Rating Prediction of Cafe on Google Maps¶

Datasets¶

In [1]:
import os
import json
import gzip
from functools import partial
from datetime import datetime, timezone
import re
from collections import defaultdict
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster
from shapely.geometry import shape, Point
from shapely.prepared import prep
from functools import lru_cache

import requests
import numpy as np
import pandas as pd
import seaborn as sns
import tqdm

import ast
import geopandas as gpd
from shapely.geometry import Point

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import r2_score

torch.manual_seed(0)
Out[1]:
<torch._C.Generator at 0x3094f9230>

Downloading Dataset¶

In [2]:
meta_path = "./datasets/raw/meta-California.json.gz"
meta_keys = ["gmap_id", "name", "latitude", "longitude", "category", "avg_rating", "num_of_reviews", "price", "hours"]

review_path = "./datasets/raw/review-California.json.gz"
review_keys = ["gmap_id", "user_id", "name", "time", "rating"]

total_reviews = 70529977
In [3]:
def download_meta_data():
    url = "https://mcauleylab.ucsd.edu/public_datasets/gdrive/googlelocal/meta-California.json.gz"
    res = requests.get(url, stream=True)

    with open(meta_path, "wb") as f:
        f.write(res.content)
In [4]:
def download_review_data():
    url = "https://mcauleylab.ucsd.edu/public_datasets/gdrive/googlelocal/review-California.json.gz"
    res = requests.get(url, stream=True)

    with open(review_path, "wb") as f:
        f.write(res.content)
In [5]:
os.makedirs("./datasets/raw", exist_ok=True)
os.makedirs("./datasets/processed", exist_ok=True)

if not os.path.exists(meta_path):
    download_meta_data()

if not os.path.exists("./datasets/raw/review-California.json.gz"):
    download_review_data()

Processing Dataset¶

In [6]:
def parse(path):
    g = gzip.open(path, "r")
    for l in g:
        yield json.loads(l)

Processing business data to extract Cafes we want to focus on.

In [7]:
# I reused my code from COGS108 project to process dataset.

def get_cafe_categories():
    categories = []
    for business in parse(meta_path):
        if business["category"] is not None:
            categories += business["category"]

    categories = np.array(categories)
    unique = np.unique(categories)

    cafe_categories = [str(category) for category in unique if "cafe" in category.lower() or "coffee" in category.lower()]
    print(f"The number of categories containting 'cafe' substring is {len(cafe_categories)}")
    print(cafe_categories)

    with open(f"./datasets/processed/cafe_categories.txt", "w") as f:
        f.write("\n".join(cafe_categories))

def filter_by_category(data, categories):
    category = data.get("category", None)
    if category is None:
        return False

    return len(set(category) & categories) != 0

def filter_by_num_reviews(data, min_num_reviews):
    return data["num_of_reviews"] >= min_num_reviews

def filter_raw_business_data(filters):
    businesses = []
    for business in parse(meta_path):
        if all([f(data=business) for f in filters]):
            business = {key: business.get(key, None) for key in meta_keys}
            businesses.append(business)

    print(f"We obtained total of {len(businesses)} after filtering")

    df = pd.DataFrame(businesses)
    df.to_csv(f"./datasets/processed/cafes.csv", index=False)
In [8]:
if not os.path.exists("./datasets/processed/cafe_categories.txt"):
        get_cafe_categories()

if not os.path.exists("./datasets/processed/cafes.csv"):
    min_num_reviews = 100

    with open("./datasets/processed/cafe_categories.txt", "r") as f:
        cafe_categories = set(f.read().split("\n"))

    cafe_filter = partial(filter_by_category, categories=cafe_categories)
    num_reviews_filter = partial(filter_by_num_reviews, min_num_reviews=min_num_reviews)

    filter_raw_business_data([cafe_filter, num_reviews_filter])

Processing review data to extract reviews we want to focus on.

In [9]:
# I reused my code from COGS108 project to process dataset.

def filter_by_gmap_id(data, gmap_ids):
    gmap_id = data.get("gmap_id", None)
    if gmap_id is None:
        return False

    return gmap_id in gmap_ids

def filter_raw_review_data(filters):
    reviews = []

    for review in tqdm.tqdm(parse(review_path), total=total_reviews):
        if all([f(data=review) for f in filters]):
            review = {key: review.get(key, None) for key in review_keys}
            review["review_id"] = f"{review['user_id']}_{review['gmap_id']}"
            reviews.append(review)

    print(f"We obtained total of {len(reviews)} after filtering")
    df = pd.DataFrame(reviews)
    df.to_csv("./datasets/raw/cafe_reviews.csv", index=False)

def extract_user_ids(reviews, min_num_reviews):
    user_ids = reviews["user_id"].dropna().values
    unique, counts = np.unique(np.array(user_ids), return_counts=True)
    users = pd.DataFrame({"user_id": unique, "num_reviews": counts})

    users = users[users["num_reviews"] >= min_num_reviews].reset_index(drop=True)
    print(f"We extracted {users.shape[0]} users after filtering.")

    users.to_csv("./datasets/processed/users.csv", index=False)

def filter_by_user_ids(reviews, user_ids):
    reviews = reviews[reviews["user_id"].isin(user_ids)]

    print(f"We extracted {reviews.shape[0]} reviews after filtering.")
    reviews.to_csv("./datasets/processed/reviews.csv", index=False)
In [10]:
if not os.path.exists("./datasets/raw/cafe_reviews.csv"):
    gmap_ids = set(pd.read_csv("./datasets/processed/cafes.csv")["gmap_id"].values)
    gmap_id_filter = partial(filter_by_gmap_id, gmap_ids=gmap_ids)
    filter_raw_review_data([gmap_id_filter])

if not os.path.exists("./datasets/processed/users.csv"):
    print("Start processing user data")
    reviews = pd.read_csv("./datasets/raw/cafe_reviews.csv")
    min_num_reviews = 20
    extract_user_ids(reviews, min_num_reviews)

if not os.path.exists("./datasets/processed/reviews.csv"):
    print("Start filtering review data")
    reviews = pd.read_csv("./datasets/raw/cafe_reviews.csv")
    user_ids = pd.read_csv("./datasets/processed/users.csv")["user_id"].values
    filter_by_user_ids(reviews, user_ids)

Split dataset into train, validation, and test so that we can evaluate models with unseen data. However, due to the design of the model which relies on pre-defined list of user and cafes, we need to split randomly without stratifying based on users or cafes.

In [11]:
def split_reviews():
    file_name = "./datasets/processed/reviews.csv"
    reviews = pd.read_csv(file_name).sample(frac=1, random_state=42)

    valid_size = int(reviews.shape[0] * 0.1)
    test_size = int(reviews.shape[0] * 0.1)

    valid_reviews = reviews.iloc[:valid_size].reset_index(drop=True)
    test_reviews = reviews.iloc[valid_size: valid_size + test_size].reset_index(drop=True)
    train_reviews = reviews.iloc[valid_size + test_size:].reset_index(drop=True)

    print(f"train: {train_reviews.shape[0]} / valid: {valid_reviews.shape[0]} / test: {test_reviews.shape[0]}")

    os.makedirs("./datasets/splits", exist_ok=True)

    train_reviews.to_csv("./datasets/splits/train.csv", index=False)
    valid_reviews.to_csv("./datasets/splits/valid.csv", index=False)
    test_reviews.to_csv("./datasets/splits/test.csv", index=False)
In [12]:
if not os.path.exists("./datasets/splits/train.csv"):
    split_reviews()

EDA¶

Overview¶

Review Time¶

In [13]:
# One Hot Encoding for Unix Time Weekday
def unix_weekday_to_onehot(time):
    feature_weekday = [0]*7

    day = datetime.fromtimestamp(time / 1000, tz=timezone.utc).weekday()
    feature_weekday[day] = 1.

    return feature_weekday

# One Hot Encoding for Unix Time Hour
def unix_hour_to_onehot(time):
    feature_dayhour = [0]*24

    hr = datetime.fromtimestamp(time / 1000, tz=timezone.utc).hour
    feature_dayhour[hr] = 1.

    return feature_dayhour
In [14]:
reviews = pd.read_csv("./datasets/processed/reviews.csv")
reviews_time = reviews.copy()

# making sure the rating is numeric
reviews_time["rating"] = pd.to_numeric(reviews_time["rating"], errors="coerce")

# converting Unix ms to datetime
reviews_time["timestamp"] = pd.to_datetime(
    reviews_time["time"],
    unit="ms"
)

reviews_time["date"] = reviews_time["timestamp"].dt.date
reviews_time["month"] = reviews_time["timestamp"].dt.to_period("M").dt.to_timestamp()

# computing average rating per month and number of reviews per month
time_stats_all = (
    reviews_time
        .dropna(subset=["rating"])
        .groupby("month")
        .agg(
            avg_rating=("rating", "mean"),
            num_reviews=("rating", "count")
        )
        .reset_index()
)

min_reviews = 100
ts_global = time_stats_all[
    (time_stats_all["num_reviews"] >= min_reviews)
    & (time_stats_all["month"] >= "2008-01-01")
].sort_values("month")

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

ax = axes[0]
ax.plot(ts_global["month"], ts_global["avg_rating"], marker="o", linestyle="-")
ax.set_xlabel("Review month")
ax.set_ylabel("Average rating")
ax.set_title("Average review rating over time")
ax.tick_params(axis="x", rotation=45)
ax.set_ylim(3.0, 5.0)  # rating range 3–5

ax = axes[1]
ax.plot(ts_global["month"], ts_global["num_reviews"], marker="o", linestyle="-")
ax.set_xlabel("Review month")
ax.set_ylabel("# Reviews")
ax.set_title("Number of reviews per month")
ax.tick_params(axis="x", rotation=45)

fig.suptitle("Review Time", fontsize=14)
fig.tight_layout()
plt.show()
No description has been provided for this image

Review Period¶

In [15]:
# One Hot Encoding for Period
def unix_period_to_onehot(unix_ms):
    """
      0 -> before 2016
      1 -> 2016-2019
      2 -> 2020 and later
    """
    if pd.isna(unix_ms):
        return np.nan
    try:
        t = int(unix_ms)
    except (ValueError, TypeError):
        return np.nan

    b2016_ms = int(pd.Timestamp("2016-01-01").timestamp() * 1000)
    b2020_ms = int(pd.Timestamp("2020-01-01").timestamp() * 1000)

    if t < b2016_ms:
        return [0, 0, 0]
    elif t < b2020_ms:
        return [0, 1., 0]
    else:
        return [0, 0, 1.]
In [16]:
reviews_time = reviews.copy()
reviews_time["rating"] = pd.to_numeric(reviews_time["rating"], errors="coerce")
reviews_time["timestamp"] = pd.to_datetime(reviews_time["time"], unit="ms")
reviews_time["month"] = reviews_time["timestamp"].dt.to_period("M").dt.to_timestamp()

time_stats_all = (
    reviews_time
        .dropna(subset=["rating"])
        .groupby("month")
        .agg(
            avg_rating=("rating", "mean"),
            num_reviews=("rating", "count")
        )
        .reset_index()
)

min_reviews = 100

boundary_2016 = pd.Timestamp("2016-01-01")
boundary_2020 = pd.Timestamp("2020-01-01")

# Label function for plotting
def label_period(ts):
    if ts < boundary_2016:
        return "pre-2016"
    elif ts < boundary_2020:
        return "2016-2019"
    else:
        return "2020+"

time_stats_period = time_stats_all.copy()
time_stats_period["period"] = time_stats_period["month"].apply(label_period)

fig, axes = plt.subplots(1, 3, figsize=(18, 4))

period_order = ["pre-2016", "2016-2019", "2020+"]

for ax, label in zip(axes, period_order):
    ts = time_stats_period[time_stats_period["period"] == label].copy()
    ts = ts[ts["num_reviews"] >= min_reviews]

    if ts.empty:
        ax.set_visible(False)
        continue  # in case the early period has no data

    ts = ts.sort_values("month")
    ax.plot(ts["month"], ts["avg_rating"], marker="o", linestyle="-")
    ax.set_xlabel("Review month")
    ax.set_ylabel("Average rating")
    ax.set_title(f"Average review rating over time ({label})")
    ax.tick_params(axis="x", rotation=45)
    ax.set_ylim(3.0, 5.1)  # focusing on reasonable ratings

fig.suptitle("Review Periods", fontsize=14)
fig.tight_layout()
plt.show()
No description has been provided for this image

Chain¶

In [17]:
def get_chains_dict(cafes):
    names, counts = np.unique(cafes["name"], return_counts=True)
    indices = np.argsort(counts)[::-1]
    sorted_counts = counts[indices]
    sorted_names = names[indices]

    stems = defaultdict(int)
    for name in sorted_names:
        words = name.lower().strip().split()

        for i in range(len(words)):
            stems[" ".join(words[:(i+1)])] += 1

    chains = {}
    for name, count in zip(sorted_names, sorted_counts):
        if count > 5:
            chains[name] = 2
            continue

        words = name.lower().strip().split()
        stem_matches = []
        for i in range(len(words)):
            stem_matches.append(stems[" ".join(words[:(i+1)])])

        if len(stem_matches) == 1 or len(stem_matches) > 10:
            chains[name] = 0
            continue

        if len(stem_matches) >= 2 and sum(stem_matches[1:]) < 10:
            chains[name] = 0
            continue

        if len(stem_matches) >= 3 and sum(stem_matches[2:]) < 5:
            chains[name] = 0
            continue

        chains[name] = 1

    return chains
In [18]:
cafes = pd.read_csv("../datasets/processed/cafes.csv")

# Counting how many times each cafe name appears
name_counts = cafes["name"].value_counts()
cafes["chain_size"] = cafes["name"].map(name_counts)

CHAIN_SIZE_THRESHOLD = 10

def classify_chain(size):
    if pd.isna(size) or size <= 1:
        return "Non Chain"
    elif size < CHAIN_SIZE_THRESHOLD:
        return "Sub Chains"
    else:
        return "Chains"

cafes["chain_category"] = cafes["chain_size"].apply(classify_chain)

# Merge with reviews to get ratings for each category
df = reviews.merge(
    cafes[["gmap_id", "chain_category"]],
    on="gmap_id",
    how="left"
)
df["rating"] = pd.to_numeric(df["rating"], errors="coerce")

chain_avg = (
    df.dropna(subset=["rating", "chain_category"])
      .groupby("chain_category")["rating"]
      .mean()
      .reindex(["Non Chain", "Sub Chains", "Chains"])
)

fig, ax = plt.subplots(figsize=(6, 4))
ax.bar(chain_avg.index, chain_avg.values)
ax.set_xlabel("Chain category")
ax.set_ylabel("Average review rating")
ax.set_title("Average Rating of Cafes by Non Chains, Sub Chains, and Chains")
plt.tight_layout()
plt.show()
No description has been provided for this image

Price¶

In [19]:
# One Hot Encoding for Price
def price_to_onehot(price):
    feature_price = [0]*4
    if price is not np.nan:
        feature_price[len(price)-1] += 1.
    return feature_price
In [20]:
cafes = pd.read_csv("../datasets/processed/cafes.csv")

def price_to_num(p):
    if pd.isna(p):
        return np.nan
    p = str(p).strip()
    if p == "" or p.lower() == "none":
        return np.nan
    n = p.count("$")
    return n if n > 0 else np.nan

cafes["price_num"] = cafes["price"].apply(price_to_num)

df = reviews.merge(
    cafes[["gmap_id", "name", "latitude", "longitude", "price_num", "avg_rating"]],
    on="gmap_id",
    how="left"
)
df["rating"] = pd.to_numeric(df["rating"], errors="coerce")

# Price vs rating - data prep
cafe_avg_price = df.groupby("gmap_id").agg(
    avg_user_rating=("rating", "mean"),
    price_num=("price_num", "first")
).dropna()

levels = sorted(cafe_avg_price["price_num"].unique())
means = cafe_avg_price.groupby("price_num")["avg_user_rating"].mean()

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Price vs rating
ax = axes[0]
ax.boxplot(
    [cafe_avg_price[cafe_avg_price["price_num"] == k]["avg_user_rating"] for k in levels],
    labels=[int(k) for k in levels]
)
ax.set_xlabel("Price level (# of $)")
ax.set_ylabel("Average review rating")
ax.set_title("Price vs rating (cafe-level)")

# Mean rating by price level
ax = axes[1]
ax.bar(means.index.astype(int), means.values)
ax.set_xlabel("Price level (# of $)")
ax.set_ylabel("Mean rating")
ax.set_title("Mean rating by price level")

fig.suptitle("Price", fontsize=14)
fig.tight_layout()
plt.show()
/var/folders/d9/8g_dsxvn7vngpll11tdl25j80000gq/T/ipykernel_37447/4126261889.py:34: MatplotlibDeprecationWarning: The 'labels' parameter of boxplot() has been renamed 'tick_labels' since Matplotlib 3.9; support for the old name will be dropped in 3.11.
  ax.boxplot(
No description has been provided for this image

Open Hours¶

In [21]:
# Converts time text to values
def parse_time(t):
    t = t.strip().upper()

    # Match hh or hh:mm formats
    m = re.match(r"(\d{1,2})(?::(\d{2}))?(AM|PM)", t)
    if not m:
        raise ValueError(f"Invalid time format: {t}")

    hour = int(m.group(1))
    minute = int(m.group(2) or 0)
    period = m.group(3)

    # Convert to 24-hour
    if period == "AM":
        if hour == 12:
            hour = 0
    else:  # PM
        if hour != 12:
            hour += 12

    return hour + minute / 60.0

# One Hot Encoding for Open Hours
def hours_to_onehot(hour_str):
    if hour_str is None or hour_str is np.nan:
        return [0,0,0]
    before_noon = 0
    after_noon = 0
    hours = ast.literal_eval(hour_str)

    for entry in hours:
        if entry[1] == "Open 24 hours":
            return [1.,0,0]
        if entry[1] == "Closed":
            continue

        open_str, close_str = entry[1].split("–")
        try:
            start_hr = int(np.floor(parse_time(open_str)))
        except ValueError:
            return [0,0,0]
        if start_hr < 13:
            before_noon += 1.
        else: after_noon += 1.

    if before_noon > after_noon:
        return [0,1.,0]
    return [0,0,1.]

def open_hours_category(hour_str):
    onehot = hours_to_onehot(hour_str)
    if onehot == [1., 0, 0]:
        return "24 hours"
    elif onehot == [0, 1., 0]:
        return "Opens before noon"
    elif onehot == [0, 0, 1.]:
        return "Opens after noon"
    else:
        return "Unknown/No hours"
In [22]:
cafes["open_hours_category"] = cafes["hours"].apply(open_hours_category)

# Merging with reviews to get ratings per category
df = reviews.merge(
    cafes[["gmap_id", "open_hours_category"]],
    on="gmap_id",
    how="left"
)
df["rating"] = pd.to_numeric(df["rating"], errors="coerce")

open_hours_avg = (
    df.dropna(subset=["rating", "open_hours_category"])
      .groupby("open_hours_category")["rating"]
      .mean()
      .reindex(["24 hours", "Opens before noon", "Opens after noon", "Unknown/No hours"])
)


fig, ax = plt.subplots(figsize=(6, 4))
ax.bar(open_hours_avg.index.tolist(), open_hours_avg.values.tolist())
ax.set_xlabel("Open-hours category")
ax.set_ylabel("Average review rating")
ax.set_title("Average Rating by Open-hours Category")
plt.xticks(rotation=15)
plt.tight_layout()
plt.show()
No description has been provided for this image

Location¶

In [23]:
@lru_cache(maxsize=1)
def get_counties_ca():
    counties = gpd.read_file("resources/cb_2018_us_county_500k.shp")
    counties_ca = counties[counties["STATEFP"] == "06"]  # California only
    counties_ca = counties_ca.sort_values("NAME").reset_index(drop=True)
    counties_ca["COUNTY_NUM"] = counties_ca.index
    counties_ca = counties_ca.set_geometry("geometry")
    _ = counties_ca.sindex
    return counties_ca

def get_county(lat, lon):
    counties_ca = get_counties_ca()
    point = Point(lon, lat)  # geometry expects (lon, lat)
    idx = list(counties_ca.sindex.intersection(point.bounds))
    if not idx:
        return None
    candidates = counties_ca.iloc[idx]
    matches = candidates[candidates.contains(point)]
    return int(matches.iloc[0]["COUNTY_NUM"]) if len(matches) else None

def location_to_onehot(location):
    feature_county =[0]*58  # 58 counties in Cali

    if location is not None:
        feature_county[location] = 1.

    return feature_county
In [24]:
# Folium Visualization:

cafes_map = cafes.dropna(subset=["latitude","longitude"]).copy()
cafes_map["avg_rating"] = pd.to_numeric(cafes_map["avg_rating"], errors="coerce")

# base map
map = folium.Map(
    location=[36.5, -119.5],
    zoom_start=6
)

# california outline from github
ca_geojson_url = "https://raw.githubusercontent.com/glynnbird/usstatesgeojson/master/california.geojson"

folium.GeoJson(
    ca_geojson_url,
    name="California outline",
    style_function=lambda feature: {
        "fillColor": "#ffffff",
        "color": "black",
        "weight": 3,
        "fillOpacity": 0.05
    }
).add_to(map)

# california counties from github
counties_url = "https://raw.githubusercontent.com/codeforamerica/click_that_hood/master/public/data/california-counties.geojson"
counties_geo = requests.get(counties_url).json()

# converting counties to polygons for averages
county_polys = []
for feat in counties_geo["features"]:
    county_name = feat["properties"]["name"]
    poly = prep(shape(feat["geometry"]))
    county_polys.append((county_name, poly))

# assigning county to each cafe based on lat/long
def find_county(lat, long):
    pt = Point(long, lat)
    for cname, poly in county_polys:
        if poly.contains(pt):
            return cname
    return np.nan

cafes_map["county"] = cafes_map.apply(
    lambda r: find_county(r["latitude"], r["longitude"]),
    axis=1
)

# getting average rating per county
county_stats = (
    cafes_map.dropna(subset=["county"]) # removes cafes without county label so they aren't computed in average
             .groupby("county")
             .agg(avg_rating=("avg_rating", "mean"))
             .reset_index()
)

# choropleth coloring counties by average rating
folium.Choropleth(
    geo_data=counties_geo,
    name="Average rating per county",
    data=county_stats,
    columns=["county", "avg_rating"],
    key_on="feature.properties.name",
    fill_color="YlGnBu",
    fill_opacity=0.7,
    line_opacity=0.3,
    nan_fill_color="white",
    legend_name="Average cafe rating"
).add_to(map)

# cafe markers
cluster = MarkerCluster(name="Cafe markers").add_to(map)

for _, r in cafes_map.iterrows():
    popup = (
        f"{r.get('name','')}"
        f"<br>Rating: {r.get('avg_rating',np.nan)}"
        f"<br>Price: {r.get('price','')}"
        f"<br>County: {r.get('county','')}"
    )
    folium.CircleMarker(
        location=[r["latitude"], r["longitude"]],
        radius=2.5,
        color="black",
        weight=0.5,
        fill=True,
        fill_opacity=0.7,
        popup=popup
    ).add_to(cluster)

# Toggle panel so we can show/hide features
folium.LayerControl(collapsed=False).add_to(map)
map
Out[24]:
Make this Notebook Trusted to load map: File -> Trust Notebook

Modeling¶

Feature Matrix Consturction¶

We construct dictionaries converting the following:

  • user_id to index of onehot vector for user.
  • gmap_id to index of onehot vector for cafe.
  • price to onehot vector for cafe price.
  • hours to onehot vector for whether a cafe is open all day, morning, or evening.
  • latitude and longitude to index of onehot vector for counties in California.
  • chains to onehot vector for size of chain (none, small, large).
  • prev to index of onehot vector for cafe that user previously rated.
In [25]:
def preprocess_data_latent(feat_names):
    reviews = pd.read_csv("./datasets/processed/reviews.csv")
    cafes = pd.read_csv("./datasets/processed/cafes.csv")

    feat_dicts = {}
    for name in feat_names:
        if name == "user":
            unique_user_ids = np.sort(np.unique(reviews["user_id"].values))
            user2index = {user_id: index for index, user_id in enumerate(unique_user_ids)}
            feat_dicts[name] = user2index

        elif name == "cafe":
            unique_gmap_ids = np.sort(np.unique(cafes["gmap_id"]))
            cafe2index = {gmap_id: index for index, gmap_id in enumerate(unique_gmap_ids)}
            feat_dicts[name] = cafe2index

        elif name == "price":
            unique_gmap_ids, indices = np.unique(cafes["gmap_id"], return_index=True)
            order = np.argsort(unique_gmap_ids)
            unique_gmap_ids = unique_gmap_ids[order]
            indices = indices[order]
            cafe2price = {gmap_id: cafes["price"][index] for gmap_id, index in zip(unique_gmap_ids, indices)}
            feat_dicts[name] = cafe2price

        elif name == "open_hours":
            unique_gmap_ids, indices = np.unique(cafes["gmap_id"], return_index=True)
            order = np.argsort(unique_gmap_ids)
            unique_gmap_ids = unique_gmap_ids[order]
            indices = indices[order]
            cafe2hours = {gmap_id: cafes["hours"][index] for gmap_id, index in zip(unique_gmap_ids, indices)}
            feat_dicts[name] = cafe2hours

        elif name == "location":
            unique_gmap_ids, indices = np.unique(cafes["gmap_id"], return_index=True)
            order = np.argsort(unique_gmap_ids)
            unique_gmap_ids = unique_gmap_ids[order]
            indices = indices[order]
            cafe2location = {gmap_id: get_county(cafes["latitude"][index],cafes["longitude"][index]) for gmap_id, index in zip(unique_gmap_ids, indices)}
            feat_dicts[name] = cafe2location

        elif name == "chains":
            unique_gmap_ids, indices = np.unique(cafes["gmap_id"], return_index=True)
            order = np.argsort(unique_gmap_ids)
            unique_gmap_ids = unique_gmap_ids[order]
            indices = indices[order]

            chains = get_chains_dict(cafes)
            cafe2chain = {gmap_id: chains[cafes["name"][index]] for gmap_id, index in zip(unique_gmap_ids, indices)}
            feat_dicts[name] = cafe2chain

        elif name == "prev":
            reviews_sorted = reviews.sort_values(by=['user_id', 'time'])

            user_interactions = (
                reviews_sorted.groupby('user_id')['gmap_id']
                .apply(list)
                .to_dict()
            )

            feat_dicts[name] = user_interactions

    avg_rating = reviews["rating"].mean()

    return feat_dicts, avg_rating

Then, we construct PyTorch Dataset. This class is desgined to be flexible about which features that are used. It receives a list of feature names in feat_names that will be used in the model and uses feat_dicts to map data to onehot vector. The following features that can be used are:

  • alpha is a bias term and we initialize it with global average of rating.
  • user is a user of a review.
  • cafe is a cafe of a review.
  • weekday is a weekday of a week when a review was posted.
  • hour is an hour of a day when a review was posted.
  • price is the price of the cafe (listed as either $, $$, $$$, or $$$$).
  • open_hours is the open hours of the cafe.
  • location is the location (county) of the cafe.
  • chains is whether a cafe is a large or small chain - or not a chain at all.
  • prev is the previous cafe the user review.
In [26]:
class CafeDatasetLatent(Dataset):
    def __init__(self, mode, feat_names, feat_dicts):
        self.reviews = pd.read_csv(f"./datasets/splits/{mode}.csv").values

        self.feat_names = feat_names
        self.feat_dicts = feat_dicts

    def get_feat_sizes(self):
        feat_sizes = {}

        for name in self.feat_names:
            if name == "alpha":
                feat_sizes[name] = 1

            elif name == "user":
                feat_sizes[name] = len(self.feat_dicts[name].keys())

            elif name == "cafe":
                feat_sizes[name] = len(self.feat_dicts[name].keys())

            elif name == "weekday":
                feat_sizes[name] = 7

            elif name == "hour":
                feat_sizes[name] = 24

            elif name == "price":
                feat_sizes[name] = 4

            elif name == "open_hours":
                feat_sizes[name] = 3

            elif name == "location":
                feat_sizes[name] = 58

            elif name == "chains":
                feat_sizes[name] = 3

            elif name == "period":
                feat_sizes[name] = 3

            elif name == "prev":
                feat_sizes[name] = len(self.feat_dicts["cafe"].keys())

            else:
                raise NotImplementedError

        return feat_sizes

    def __len__(self):
        return self.reviews.shape[0]

    def __getitem__(self, index):
        review = self.reviews[index]
        feats = []
        for name in self.feat_names:
            if name == "alpha":
                feat = torch.ones(1)
                feats.append(feat)

            elif name == "user":
                feat_dict = self.feat_dicts[name]
                feat = torch.zeros(len(feat_dict.keys()))
                feat[feat_dict[review[1]]] = 1.
                feats.append(feat)

            elif name == "cafe":
                feat_dict = self.feat_dicts[name]
                feat = torch.zeros(len(feat_dict.keys()))
                feat[feat_dict[review[0]]] = 1.
                feats.append(feat)

            elif name == "weekday":
                feat = torch.tensor(unix_weekday_to_onehot(int(review[3])))
                feats.append(feat)

            elif name == "hour":
                feat = torch.tensor(unix_hour_to_onehot(int(review[3])))
                feats.append(feat)

            elif name == "price":
                feat_dict = self.feat_dicts[name]
                feat = torch.tensor(price_to_onehot(feat_dict[review[0]]))
                feats.append(feat)

            elif name == "open_hours":
                feat_dict = self.feat_dicts[name]
                feat = torch.tensor(hours_to_onehot(feat_dict[review[0]]))
                feats.append(feat)

            elif name == "location":
                feat_dict = self.feat_dicts[name]
                feat = torch.tensor(location_to_onehot(feat_dict[review[0]]))
                feats.append(feat)

            elif name == "chains":
                feat_dict = self.feat_dicts[name]
                feat = torch.zeros(3)
                feat[feat_dict[review[0]]] = 1.
                feats.append(feat)

            elif name == "period":
                feat = torch.tensor(unix_period_to_onehot(int(review[3])))
                feats.append(feat)

            elif name == "prev":
                cafe_feat_dict = self.feat_dicts['cafe']    # All cafes
                feat_dict = self.feat_dicts[name]           # List of user -> list of all cafes user rated
                feat = torch.zeros(len(cafe_feat_dict.keys()))
                user = review[1]
                current_item = review[0]
                i = feat_dict[user].index(current_item)     # Get index of current cafe in user list
                if i > 0:
                    prev_item = feat_dict[user][i-1]
                    feat[cafe_feat_dict[prev_item]] = 1.
                feats.append(feat)

            else:
                raise NotImplementedError

        rating = torch.tensor(review[4])

        return *feats, rating

Next, we construct a model for rating prediction. This class is also designed to be flexible and it receives following arguments:

  • name is a unique identifier of a model.
  • dim is a dimension of latent.
  • feat_sizes is a list of feature sizes which would be used to initialize weights.
  • latent_names is a list of latent feature names which would be used to initialize latents.
  • latent_pairs is a list of tuples which indicate pairs we would calculate dot product in between.
  • avg_rating is a global average rating which is used to initialize alpha.
  • share_latents is indicator used for user history model to share latents for current and previous cafes.

This model has two sets of parameters:

  • weights is a dictionary of weights for each feature and corresponds to betas in model equation.
  • latents is a dictionary of latents for feature we specified and corresponds to gammas in model equation.
In [27]:
class RatePredictorLatent(nn.Module):
    def __init__(self, name, dim, feat_sizes, latent_names, latent_pairs, avg_rating, share_latents=False):
        super().__init__()

        self.name = name

        self.feat_names = list(feat_sizes.keys())
        self.latent_names = latent_names
        self.latent_pairs = latent_pairs

        self.share_latents = share_latents

        weights = {}
        for name, feat_size in feat_sizes.items():
            if self.share_latents and name == "prev":
                continue

            if name == "alpha":
                weight = torch.tensor(avg_rating).unsqueeze(0)
            else:
                weight = torch.zeros(feat_size)
            weights[name] = nn.Parameter(weight, requires_grad=True)

        self.weights =  nn.ParameterDict(weights)

        latents = {}
        for name in latent_names:
            feat_size = feat_sizes[name]
            latent = torch.randn(feat_size, dim) / dim
            latents[name] = nn.Parameter(latent, requires_grad=True)

        self.latents = nn.ParameterDict(latents)

    def forward(self, feats):
        out = torch.zeros(feats["alpha"].size(0)).to(feats["alpha"].device)
        for name in self.feat_names:
            if self.share_latents and name == "prev":
                continue

            out += torch.einsum("bd,d->b", feats[name], self.weights[name])

        gammas = {}
        for name in self.latent_names:
            gammas[name] = torch.einsum("bd,di->bi", feats[name], self.latents[name])

        if self.share_latents:
            gammas["prev"] = torch.einsum("bd,di->bi", feats["prev"], self.latents["cafe"])

        for (latent_i, latent_j) in self.latent_pairs:
            out += torch.einsum("bi,bi->b", gammas[latent_i], gammas[latent_j])

        return out

Next, we construct a trainer to train a model by using autograd of PyTorch.

In [28]:
class RateTrainerLatent():
    def __init__(self, model, lamb_dict, lr, train_dataloader, valid_dataloader, device):
        self.model = model
        self.lamb_dict = lamb_dict
        self.train_dataloader = train_dataloader
        self.valid_dataloader = valid_dataloader
        self.device = device

        self.feat_names = model.feat_names
        self.latent_names = model.latent_names

        self.optim =  torch.optim.Adam(model.parameters(), lr=lr)

    def train(self, n_epochs):
        train_mses, valid_mses = [], []
        best_mse = float("inf")
        for i in range(n_epochs):
            train_mse = 0
            total = 0

            bar = tqdm.tqdm(self.train_dataloader, desc="Training Model")
            for feats in bar:
                ratings = feats[-1].to(self.device)
                assert len(self.feat_names) + 1 == len(feats)
                feats = {name: f.to(self.device) for name, f in zip(self.feat_names, feats[:-1])}

                self.optim.zero_grad()

                pred_ratings = self.model(feats)
                mse = self.mse(ratings, pred_ratings)
                mse_reg = mse + self.regularizer()

                mse_reg.backward()
                self.optim.step()

                batch_size = feats["alpha"].size(0)
                train_mse += mse.item() * batch_size
                total += batch_size

                bar.set_description(f"Training Model ({mse.item():.6f})")

            train_mse /= total
            valid_mse = self.validate()
            print(f"Step[{i + 1:2d}]: train {train_mse:2.6f} / valid {valid_mse:2.6f}")

            if valid_mse < best_mse:
                best_mse = valid_mse
                torch.save(self.model, f"./models/{self.model.name}.pt")

            train_mses.append(train_mse)
            valid_mses.append(valid_mse)

        return train_mses, valid_mses

    def validate(self):
        with torch.no_grad():
            total = 0
            mse = 0

            for feats in self.valid_dataloader:
                ratings = feats[-1].to(self.device)
                assert len(self.feat_names) + 1 == len(feats)
                feats = {name: f.to(self.device) for name, f in zip(self.feat_names, feats[:-1])}

                pred_ratings = self.model(feats)

                batch_size = feats["alpha"].size(0)
                mse += self.mse(ratings, pred_ratings).item() * batch_size
                total += batch_size

            return mse / total

    def mse(self, y_true, y_pred):
        return torch.mean((y_true - y_pred) ** 2)

    def regularizer(self):
        reg = 0
        for name in self.feat_names:
            if self.model.share_latents and name == "prev":
                continue

            reg += self.lamb_dict[name] * torch.mean(self.model.weights[name] ** 2)

        for name in self.latent_names:
            latents = self.model.latents[name]
            reg += self.lamb_dict[name] * latents.size(1) * torch.mean(latents ** 2)

        return reg

We record metrics in a file metrics.json

In [29]:
def update_metrics(name, train, valid):
    if os.path.exists("./metrics.json"):
        with open("./metrics.json", "r") as f:
           metrics = json.load(f)
    else:
        metrics = {}

    metrics[name] = {"metrics": {"train": train, "valid": valid}}

    with open("./metrics.json", "w") as f:
        json.dump(metrics, f)

After a series of experiments, we ended up using the following hyperparamters for all the experiments below:

  • n_epoch is the numer of iterations for training.
  • lr is a learning rate of gradient descent.
  • dim is a dimension of latents if used.
  • batch_size is a batch size of training.
  • device is a device we run models on. You can change "cpu" to "cuda" if you have GPU environment. However, we only tested on "cpu" so we cannot guarantee that it would work on "gpu".
In [30]:
n_epoch = 10
lr = 0.01
dim = 32
batch_size = 2048

device = torch.device("cpu")

Then we define train function which train a model given a parameter dictionary called param_dict which requires the following keys:

  • feat is a category of a model where we used "base", "chains", "price", "open_hours", "time", "period", and "prev". We add "_latent" if it uses latents.
  • feat_names is a list of features that model uses.
  • latent_names is a list of features that model computes latents for.
  • latent_pairs is a list of pairs of latent features which model computs dot product for.
  • lamb_dict is a dictionary which maps feature name to regularizer coefficient for each feature.
  • share_latents is a indicator if model share latents for current and previous cafe in user history model.
In [31]:
def train(param_dict):
    feat = param_dict["feat"]
    feat_names = param_dict["feat_names"]
    latent_names = param_dict["latent_names"]
    latent_pairs = param_dict["latent_pairs"]
    lamb_dict = param_dict["lamb_dict"]
    share_latents = param_dict.get("share_latents", 0)

    lamb_str = "_".join([f"{name}-{value}" for name, value in lamb_dict.items()])
    name = f"{feat}_{lamb_str}"

    if not os.path.exists(f"./models/{name}.pt"):
        print(f"Start training {name}")

        feat_dicts, avg_rating = preprocess_data_latent(feat_names)
        train_dataset = CafeDatasetLatent("train", feat_names, feat_dicts)
        valid_dataset = CafeDatasetLatent("valid", feat_names, feat_dicts)

        train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)

        feat_sizes = train_dataset.get_feat_sizes()
        model = RatePredictorLatent(name, dim, feat_sizes, latent_names, latent_pairs, avg_rating, share_latents=share_latents)

        trainer = RateTrainerLatent(model, lamb_dict, lr, train_dataloader, valid_dataloader, device)

        os.makedirs("./models", exist_ok=True)
        train_mses, valid_mses = trainer.train(n_epoch)

        update_metrics(name, train_mses, valid_mses)

Running Models to Find Hyper-Parameters¶

In the following sections, we train models to test if adding specific feature would boost performance and which regularizer coefficient is best for each feature. This would take hours to run so this code won't run training. However, if you want to run training you can set a variable validation as True.

In [32]:
validation = False

We also load pre-calculated training histories.

In [33]:
def get_name(param_dict):
    feat = param_dict["feat"]
    lamb_dict = param_dict["lamb_dict"]

    lamb_str = "_".join([f"{name}-{value}" for name, value in lamb_dict.items()])
    name = f"{feat}_{lamb_str}"

    return name

def plot_results(params):
    if not os.path.exists("./metrics.json"):
        return None

    with open("./metrics.json", "r") as f:
        metrics = json.load(f)

    sns.set_theme(style="whitegrid", palette="viridis")
    _, axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 5))

    best_mses, categories = [], []
    feat = ""
    for param_dict in params:
        feat = param_dict["feat"]
        name = get_name(param_dict)
        metric = metrics.get(name, None)
        valid_mses = metric["metrics"]["valid"]
        df = pd.DataFrame({"epochs": np.arange(1, n_epoch + 1), "valid": valid_mses})
        sns.lineplot(df, x="epochs", y="valid", label=name, ax=axes[0])

        best_mses.append(min(valid_mses))

        if "base" in feat:
            lamb_dict = param_dict["lamb_dict"]
            categories.append(f"user lamb {lamb_dict["user"]} \ncafe lamb {lamb_dict["cafe"]}")

        elif "time" in feat:
            lamb_dict = param_dict["lamb_dict"]
            categories.append(f"weekday lamb {lamb_dict["weekday"]} \nhour lamb {lamb_dict["hour"]}")

        else:
            lamb_dict = param_dict["lamb_dict"]
            name = "_".join([l for l in feat.split("_") if l != "latent" and l != "all"])
            categories.append(f"{name} lamb {lamb_dict[name]}")



    axes[0].set_xlabel("Epochs")
    axes[0].set_ylabel("Validation MSE")
    axes[0].legend()

    df = pd.DataFrame({"category": categories, "mse": best_mses})
    sns.barplot(df, y="category", x="mse", ax=axes[1])
    axes[1].set_ylabel("Models")
    axes[1].set_xlabel("Best Validation MSE")

    if "base" in feat:
        axes[1].set_xlim(0.67, 0.77)

    else:
        axes[1].set_xlim(0.67, 0.72)

    plt.tight_layout()
    plt.show()
In [34]:
best_params = {}

Plotting Results¶

We plot results to compare performance on validation set and see training history.

Base without Latents¶

We train a naive model where we only use features of user_id and cafe without latents.

In [35]:
params = [
    {
        "feat": "base",
        "feat_names": ["alpha", "user", "cafe"],
        "latent_names": [],
        "latent_pairs": [],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1},
        "test": 1
    },
    {
        "feat": "base",
        "feat_names": ["alpha", "user", "cafe"],
        "latent_names": [],
        "latent_pairs": [],
        "lamb_dict": {"alpha": 0, "user": 1, "cafe": 0.1},
        "test": 0
    },
    {
        "feat": "base",
        "feat_names": ["alpha", "user", "cafe"],
        "latent_names": [],
        "latent_pairs": [],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 0.1},
        "test": 0
    },
    {
        "feat": "base",
        "feat_names": ["alpha", "user", "cafe"],
        "latent_names": [],
        "latent_pairs": [],
        "lamb_dict": {"alpha": 0, "user": 1, "cafe": 1},
        "test": 0
    }
]
In [36]:
if validation:
    for param_dict in params:
        train(param_dict)
In [37]:
plot_results(params)
No description has been provided for this image
In [38]:
best_params["base"] = params[2]

Base with Latents (User x Cafe)¶

Next, we train a naive model with latents for user and cafe.

In [39]:
params = [
    {
        "feat": "base_latent",
        "feat_names": ["alpha", "user", "cafe"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1},
        "test": 1
    },
    {
        "feat": "base_latent",
        "feat_names": ["alpha", "user", "cafe"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 1, "cafe": 0.1},
        "test": 0
    },
    {
        "feat": "base_latent",
        "feat_names": ["alpha", "user", "cafe"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 0.1},
        "test": 0
    },
    {
        "feat": "base_latent",
        "feat_names": ["alpha", "user", "cafe"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 1, "cafe": 1},
        "test": 0
    }
]
In [40]:
if validation:
    for param_dict in params:
        train(param_dict)
In [41]:
plot_results(params)
No description has been provided for this image
In [42]:
best_params["base_latent"] = params[0]

Review Time without Latents¶

In [43]:
params = [
    {
        "feat": "time",
        "feat_names": ["alpha", "user", "cafe", "weekday", "hour"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "weekday": 1, "hour": 1},
        "test": 0
    },
    {
        "feat": "time",
        "feat_names": ["alpha", "user", "cafe", "weekday", "hour"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "weekday": 0.1, "hour": 0.1},
        "test": 0
    },
    {
        "feat": "time",
        "feat_names": ["alpha", "user", "cafe", "weekday", "hour"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "weekday": 0.01, "hour": 0.01},
        "test": 0
    }
]
In [44]:
if validation:
    for param_dict in params:
        train(param_dict)
In [45]:
plot_results(params)
No description has been provided for this image
In [46]:
best_params["time"] = params[1]

Review Time with Latents (User x Time)¶

In [47]:
params = [
    {
        "feat": "time_latent",
        "feat_names": ["alpha", "user", "cafe", "weekday", "hour"],
        "latent_names": ["user", "cafe", "weekday", "hour"],
        "latent_pairs": [["user", "cafe"], ["user", "weekday"], ["user", "hour"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "weekday": 1, "hour": 1},
        "test": 0
    },
    {
        "feat": "time_latent",
        "feat_names": ["alpha", "user", "cafe", "weekday", "hour"],
        "latent_names": ["user", "cafe", "weekday", "hour"],
        "latent_pairs": [["user", "cafe"], ["user", "weekday"], ["user", "hour"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "weekday": 0.1, "hour": 0.1},
        "test": 0
    },
    {
        "feat": "time_latent",
        "feat_names": ["alpha", "user", "cafe", "weekday", "hour"],
        "latent_names": ["user", "cafe", "weekday", "hour"],
        "latent_pairs": [["user", "cafe"], ["user", "weekday"], ["user", "hour"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "weekday": 0.01, "hour": 0.01},
        "test": 0
    }
]
In [48]:
if validation:
    for param_dict in params:
        train(param_dict)
In [49]:
plot_results(params)
No description has been provided for this image
In [50]:
best_params["time_latent"] = params[0]

Review Time with Latents (User x Time and Cafe x Time)¶

In [51]:
params = [
    {
        "feat": "time_all_latent",
        "feat_names": ["alpha", "user", "cafe", "weekday", "hour"],
        "latent_names": ["user", "cafe", "weekday", "hour"],
        "latent_pairs": [["user", "cafe"], ["user", "weekday"], ["user", "hour"], ["cafe", "weekday"], ["cafe", "hour"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "weekday": 1, "hour": 1},
        "test": 0
    },
    {
        "feat": "time_all_latent",
        "feat_names": ["alpha", "user", "cafe", "weekday", "hour"],
        "latent_names": ["user", "cafe", "weekday", "hour"],
        "latent_pairs": [["user", "cafe"], ["user", "weekday"], ["user", "hour"], ["cafe", "weekday"], ["cafe", "hour"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "weekday": 0.1, "hour": 0.1},
        "test": 0
    },
    {
        "feat": "time_all_latent",
        "feat_names": ["alpha", "user", "cafe", "weekday", "hour"],
        "latent_names": ["user", "cafe", "weekday", "hour"],
        "latent_pairs": [["user", "cafe"], ["user", "weekday"], ["user", "hour"], ["cafe", "weekday"], ["cafe", "hour"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "weekday": 0.01, "hour": 0.01},
        "test": 0
    }
]
In [52]:
if validation:
    for param_dict in params:
        train(param_dict)
In [53]:
plot_results(params)
No description has been provided for this image
In [54]:
best_params["time_all_latent"] = params[0]

Review Period without Latents¶

In [55]:
params = [
    {
        "feat": "period",
        "feat_names": ["alpha", "user", "cafe", "period"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "period": 0.01},
        "test": 0
    },
    {
        "feat": "period",
        "feat_names": ["alpha", "user", "cafe", "period"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "period": 0.1},
        "test": 0
    },
    {
        "feat": "period",
        "feat_names": ["alpha", "user", "cafe", "period"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "period": 1},
        "test": 0
    }
]
In [56]:
if validation:
    for param_dict in params:
        train(param_dict)
In [57]:
plot_results(params)
No description has been provided for this image
In [58]:
best_params["period"] = params[2]

Review Period with Latents (User x Period)¶

In [59]:
params = [
    {
        "feat": "period_latent",
        "feat_names": ["alpha", "user", "cafe", "period"],
        "latent_names": ["user", "cafe", "period"],
        "latent_pairs": [["user", "cafe"], ["user", "period"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "period": 0.01},
        "test": 0
    },
    {
        "feat": "period_latent",
        "feat_names": ["alpha", "user", "cafe", "period"],
        "latent_names": ["user", "cafe", "period"],
        "latent_pairs": [["user", "cafe"], ["user", "period"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "period": 0.1},
        "test": 0
    },
    {
        "feat": "period_latent",
        "feat_names": ["alpha", "user", "cafe", "period"],
        "latent_names": ["user", "cafe", "period"],
        "latent_pairs": [["user", "cafe"], ["user", "period"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "period": 1},
        "test": 0
    }
]
In [60]:
if validation:
    for param_dict in params:
        train(param_dict)
In [61]:
plot_results(params)
No description has been provided for this image
In [62]:
best_params["period_latent"] = params[1]

Review Period with Latents (User x Period and Cafe x Period)¶

In [63]:
params = [
    {
        "feat": "period_all_latent",
        "feat_names": ["alpha", "user", "cafe", "period"],
        "latent_names": ["user", "cafe", "period"],
        "latent_pairs": [["user", "cafe"], ["user", "period"], ["cafe", "period"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "period": 0.01},
        "test": 0
    },
    {
        "feat": "period_all_latent",
        "feat_names": ["alpha", "user", "cafe", "period"],
        "latent_names": ["user", "cafe", "period"],
        "latent_pairs": [["user", "cafe"], ["user", "period"], ["cafe", "period"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "period": 0.1},
        "test": 0
    },
    {
        "feat": "period_all_latent",
        "feat_names": ["alpha", "user", "cafe", "period"],
        "latent_names": ["user", "cafe", "period"],
        "latent_pairs": [["user", "cafe"], ["user", "period"], ["cafe", "period"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "period": 1},
        "test": 0
    }
]
In [64]:
if validation:
    for param_dict in params:
        train(param_dict)
In [65]:
plot_results(params)
No description has been provided for this image
In [66]:
best_params["period_all_latent"] = params[1]

Chains without Latents¶

In [67]:
params = [
    {
        "feat": "chains",
        "feat_names": ["alpha", "user", "cafe", "chains"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "chains": 0.01},
        "test": 0
    },
    {
        "feat": "chains",
        "feat_names": ["alpha", "user", "cafe", "chains"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "chains": 0.1},
        "test": 0
    },
    {
        "feat": "chains",
        "feat_names": ["alpha", "user", "cafe", "chains"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "chains": 1},
        "test": 0
    }
]
In [68]:
if validation:
    for param_dict in params:
        train(param_dict)
In [69]:
plot_results(params)
No description has been provided for this image
In [70]:
best_params["chains"] = params[1]

Chains with Latents (User x Chain)¶

In [71]:
params = [
    {
        "feat": "chains_latent",
        "feat_names": ["alpha", "user", "cafe", "chains"],
        "latent_names": ["user", "cafe", "chains"],
        "latent_pairs": [["user", "cafe"], ["user", "chains"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "chains": 0.01},
        "test": 0
    },
    {
        "feat": "chains_latent",
        "feat_names": ["alpha", "user", "cafe", "chains"],
        "latent_names": ["user", "cafe", "chains"],
        "latent_pairs": [["user", "cafe"], ["user", "chains"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "chains": 0.1},
        "test": 0
    },
    {
        "feat": "chains_latent",
        "feat_names": ["alpha", "user", "cafe", "chains"],
        "latent_names": ["user", "cafe", "chains"],
        "latent_pairs": [["user", "cafe"], ["user", "chains"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "chains": 1},
        "test": 0
    }
]
In [72]:
if validation:
    for param_dict in params:
        train(param_dict)
In [73]:
plot_results(params)
No description has been provided for this image
In [74]:
best_params["chains_latent"] = params[1]

Price without Latents¶

In [75]:
params = [
    {
        "feat": "price",
        "feat_names": ["alpha", "user", "cafe", "price"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "price": 0.01},
        "test": 0
    },
    {
        "feat": "price",
        "feat_names": ["alpha", "user", "cafe", "price"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "price": 0.1},
        "test": 0
    },
    {
        "feat": "price",
        "feat_names": ["alpha", "user", "cafe", "price"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "price": 1},
        "test": 0
    }
]
In [76]:
if validation:
    for param_dict in params:
        train(param_dict)
In [77]:
plot_results(params)
No description has been provided for this image
In [78]:
best_params["price"] = params[0]

Price with Latents (User x Price)¶

In [79]:
params = [
    {
        "feat": "price_latent",
        "feat_names": ["alpha", "user", "cafe", "price"],
        "latent_names": ["user", "cafe", "price"],
        "latent_pairs": [["user", "cafe"], ["user", "price"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "price": 0.01},
        "test": 0
    },
    {
        "feat": "price_latent",
        "feat_names": ["alpha", "user", "cafe", "price"],
        "latent_names": ["user", "cafe", "price"],
        "latent_pairs": [["user", "cafe"], ["user", "price"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "price": 0.1},
        "test": 0
    },
    {
        "feat": "price_latent",
        "feat_names": ["alpha", "user", "cafe", "price"],
        "latent_names": ["user", "cafe", "price"],
        "latent_pairs": [["user", "cafe"], ["user", "price"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "price": 1},
        "test": 0
    }
]
In [80]:
if validation:
    for param_dict in params:
        train(param_dict)
In [81]:
plot_results(params)
No description has been provided for this image
In [82]:
best_params["price_latent"] = params[1]

Open Hours without Latents¶

In [83]:
params = [
    {
        "feat": "open_hours",
        "feat_names": ["alpha", "user", "cafe", "open_hours"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "open_hours": 0.01},
        "test": 0
    },
    {
        "feat": "open_hours",
        "feat_names": ["alpha", "user", "cafe", "open_hours"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "open_hours": 0.1},
        "test": 0
    },
    {
        "feat": "open_hours",
        "feat_names": ["alpha", "user", "cafe", "open_hours"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "open_hours": 1},
        "test": 0
    }
]
In [84]:
if validation:
    for param_dict in params:
        train(param_dict)
In [85]:
plot_results(params)
No description has been provided for this image
In [86]:
best_params["open_hours"] = params[1]

Open Hours with Latents (User x Open Hours)¶

In [87]:
params = [
    {
        "feat": "open_hours_latent",
        "feat_names": ["alpha", "user", "cafe", "open_hours"],
        "latent_names": ["user", "cafe", "open_hours"],
        "latent_pairs": [["user", "cafe"], ["user", "open_hours"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "open_hours": 0.01},
        "test": 0
    },
    {
        "feat": "open_hours_latent",
        "feat_names": ["alpha", "user", "cafe", "open_hours"],
        "latent_names": ["user", "cafe", "open_hours"],
        "latent_pairs": [["user", "cafe"], ["user", "open_hours"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "open_hours": 0.1},
        "test": 0
    },
    {
        "feat": "open_hours_latent",
        "feat_names": ["alpha", "user", "cafe", "open_hours"],
        "latent_names": ["user", "cafe", "open_hours"],
        "latent_pairs": [["user", "cafe"], ["user", "open_hours"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "open_hours": 1},
        "test": 0
    }
]
In [88]:
if validation:
    for param_dict in params:
        train(param_dict)
In [89]:
plot_results(params)
No description has been provided for this image
In [90]:
best_params["open_hours_latent"] = params[1]

Location without Latents¶

In [91]:
params = [
    {
        "feat": "location",
        "feat_names": ["alpha", "user", "cafe", "location"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "location": 1},
        "test": 0
    },
    {
        "feat": "location",
        "feat_names": ["alpha", "user", "cafe", "location"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "location": 0.1},
        "test": 0
    },
    {
        "feat": "location",
        "feat_names": ["alpha", "user", "cafe", "location"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "location": 0.01},
        "test": 0
    }
]
In [92]:
if validation:
    for param_dict in params:
        train(param_dict)
In [93]:
plot_results(params)
No description has been provided for this image
In [94]:
best_params["location"] = params[0]

Location with Latents (User x Location)¶

In [95]:
params = [
    {
        "feat": "location_latent",
        "feat_names": ["alpha", "user", "cafe", "location"],
        "latent_names": ["user", "cafe", "location"],
        "latent_pairs": [["user", "cafe"], ["user", "location"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "location": 1},
        "test": 0
    },
    {
        "feat": "location_latent",
        "feat_names": ["alpha", "user", "cafe", "location"],
        "latent_names": ["user", "cafe", "location"],
        "latent_pairs": [["user", "cafe"], ["user", "location"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "location": 0.1},
        "test": 0
    },
    {
        "feat": "location_latent",
        "feat_names": ["alpha", "user", "cafe", "location"],
        "latent_names": ["user", "cafe", "location"],
        "latent_pairs": [["user", "cafe"], ["user", "location"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "location": 0.01},
        "test": 0
    }
]
In [96]:
if validation:
    for param_dict in params:
        train(param_dict)
In [97]:
plot_results(params)
No description has been provided for this image
In [98]:
best_params["location_latent"] = params[0]

Previous Cafe without Shared Latents (Cafe x Prev Cafe)¶

In [99]:
params = [
    {
        "feat": "prev_latent",
        "feat_names": ["alpha", "user", "cafe", "prev"],
        "latent_names": ["user", "cafe", "prev"],
        "latent_pairs": [["user", "cafe"], ["cafe", "prev"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "prev": 0.01},
        "test": 0
    },
    {
        "feat": "prev_latent",
        "feat_names": ["alpha", "user", "cafe", "prev"],
        "latent_names": ["user", "cafe", "prev"],
        "latent_pairs": [["user", "cafe"], ["cafe", "prev"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "prev": 0.1},
        "test": 0
    },
    {
        "feat": "prev_latent",
        "feat_names": ["alpha", "user", "cafe", "prev"],
        "latent_names": ["user", "cafe", "prev"],
        "latent_pairs": [["user", "cafe"], ["cafe", "prev"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "prev": 1},
        "test": 0
    },
    {
        "feat": "prev_latent",
        "feat_names": ["alpha", "user", "cafe", "prev"],
        "latent_names": ["user", "cafe", "prev"],
        "latent_pairs": [["user", "cafe"], ["cafe", "prev"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1, "prev": 2},
        "test": 0
    }
]
In [100]:
if validation:
    for param_dict in params:
        train(param_dict)
In [101]:
plot_results(params)
No description has been provided for this image
In [102]:
best_params["prev_latent"] = params[3]

Previous Cafe with Shared Latents (Cafe x Prev Cafe)¶

In [103]:
params = [
    {
        "feat": "prev_share_latent",
        "feat_names": ["alpha", "user", "cafe", "prev"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"], ["cafe", "prev"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1},
        "share_latents": 1,
        "test": 0
    }
]
In [104]:
if validation:
    for param_dict in params:
        train(param_dict)
In [105]:
best_params["prev_share_latent"] = params[0]

Comparisons¶

In [106]:
def plot_validation_comparisons(params):
    if not os.path.exists("./metrics.json"):
        return None

    with open("./metrics.json", "r") as f:
        metrics = json.load(f)

    categories, mses = [], []
    for category, param_dict in params.items():
        name = get_name(param_dict)
        mse = min(metrics[name]["metrics"]["valid"])

        categories.append(category)
        mses.append(mse)

    df = pd.DataFrame({"category": categories, "mse": mses})
    baseline = df[df["category"] == "base_latent"]["mse"].values[0]
    df["baseline"] = df["mse"].apply(lambda x: x < baseline)

    plt.figure(figsize=(30, 5))
    sns.set_theme(style="whitegrid", palette="viridis")
    sns.barplot(df, x="category", y="mse", hue="baseline")

    plt.ylim(0.66, 0.70)
    plt.xlabel("Models with different features")
    plt.ylabel("Validation MSE")
    plt.title("Comparison of validation MSE for models with different features")
    plt.axhline(baseline, linestyle="--")
    plt.show()
In [107]:
plot_validation_comparisons(best_params)
No description has been provided for this image

Final Model¶

Based on validation results above, we ended up building a model as follows:

In [108]:
final_param_dict = {
    "feat": "final",
    "feat_names": [
        "alpha", "user", "cafe",
        "chains", "price", "open_hours", "location",
        "weekday", "hour", "period"
    ],
    "latent_names": ["user", "cafe", "chains", "price", "open_hours", "weekday", "hour", "period"],
    "latent_pairs": [
        ["user", "cafe"],
        ["user", "chains"],
        ["user", "price"],
        ["user", "open_hours"],
        ["user", "weekday"],
        ["user", "hour"],
        ["user", "period"],
        ["cafe", "period"]
    ],
    "lamb_dict": {
        "alpha": 0,
        "user": 0.1,
        "cafe": 1,
        "chains": 0.1,
        "price": 0.1,
        "open_hours": 0.1,
        "location": 1,
        "weekday": 1,
        "hour": 1,
        "period": 0.1
    },
    "test": 1
}
In [109]:
train(final_param_dict)

Baselines¶

We need two baselines for further analysis and testing.

In [110]:
baseline_params = [
    {
        "feat": "base",
        "feat_names": ["alpha", "user", "cafe"],
        "latent_names": [],
        "latent_pairs": [],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1},
        "test": 1
    },
    {
        "feat": "base_latent",
        "feat_names": ["alpha", "user", "cafe"],
        "latent_names": ["user", "cafe"],
        "latent_pairs": [["user", "cafe"]],
        "lamb_dict": {"alpha": 0, "user": 0.1, "cafe": 1},
        "test": 1
    }
]
In [111]:
for baseline_param_dict in baseline_params:
    train(baseline_param_dict)

Testing Models¶

We used three ways to evaluate models as follows:

  • mse is a metric used as an objective for training.
  • rmse is a root of mse which has same scale as predictive variable (rating).
  • accuracy is accuracy of correct discrete rating prediction. Since all reviews have discrete ratings of 1.0, 2.0, 3.0, 4.0 and 5.0, given predictions, we asigned discrete prediction by rounding to nearest integer and calculated accuracy comparing true ratings.
In [112]:
def calculate_mse(y_true, y_pred):
    return torch.mean((y_true - y_pred) ** 2)

def calculate_rmse(y_true, y_pred):
    return torch.sqrt(torch.mean((y_true - y_pred) ** 2))

def discrete_rating(y_pred):
    y_pred = torch.clamp(y_pred, min=0, max=5)
    y_pred = torch.round(y_pred)

    return y_pred

For testing, we saved results in csv table so that we can compare models easily, which is saved to ./test_results.csv.

In [113]:
def update_test_results(result):
    new_result = pd.Series(result).to_frame().T

    if os.path.exists("./test_results.csv"):
        results = pd.read_csv("./test_results.csv")

        duplicate_index = results["name"] == result["name"]
        if sum(duplicate_index) == 0:
            results = pd.concat([results, new_result]).reset_index(drop=True)
        else:
            results = results.values
            results[duplicate_index] = new_result.values
            results = pd.DataFrame(results, columns=new_result.columns)
    else:
        results = new_result

    print(results)
    results.to_csv("./test_results.csv", index=False)
In [114]:
def evaluate_model(name, test_dataloader, model, device):
    with torch.no_grad():
        total = 0
        mse, rmse = 0, 0
        n_corrects = 0

        model.to(device)
        model.eval()

        feat_names = model.feat_names

        for feats in test_dataloader:
            ratings = feats[-1].to(device)
            feats = {name: f.to(device) for name, f in zip(feat_names, feats[:-1])}

            pred_ratings = model(feats)

            batch_size = feats["alpha"].size(0)
            mse += calculate_mse(ratings, pred_ratings).item() * batch_size
            rmse += calculate_rmse(ratings, pred_ratings).item() * batch_size

            pred_discrete = discrete_rating(pred_ratings)

            n_corrects += torch.sum(pred_discrete == ratings).item()
            total += batch_size

        test_mse = mse / total
        test_rmse = rmse / total
        test_accuracy = n_corrects / total

        if "final" in name:
            name = "final"
        elif "base_latent" in name:
            name = "base_latent"
        elif "base" in name:
            name = "base"

        return {"name": name, "mse": test_mse, "rmse": test_rmse, "accuracy": test_accuracy}

First we evaluate two baseline models:

  • MostCommon just returns most common discrete rating 5.
  • Naive just returns a global average.
In [115]:
class MostCommon(nn.Module):
    def __init__(self):
        super().__init__()

        reviews = pd.read_csv("./datasets/splits/test.csv")

        ratings, counts = np.unique(reviews["rating"], return_counts=True)
        self.most_common = torch.tensor(ratings[np.argmax(counts)])

        self.feat_names = ["alpha"]

    def forward(self, x):
        return self.most_common.repeat(x["alpha"].size(0))

class Naive(nn.Module):
    def __init__(self):
        super().__init__()

        reviews = pd.read_csv("./datasets/splits/test.csv")
        self.average = torch.tensor(np.mean(reviews["rating"]))

        self.feat_names = ["alpha"]

    def forward(self, x):
        return self.average.repeat(x["alpha"].size(0))

The following a function to run models on test dataset and save results into test_results.csv.

In [116]:
def test_model(param_dict, name=None):
    if name == "most_common":
        model = MostCommon()
        feat_names = ["alpha"]
    elif name == "naive":
        model = Naive()
        feat_names = ["alpha"]
    else:
        feat = param_dict["feat"]
        feat_names = param_dict["feat_names"]
        lamb_dict = param_dict["lamb_dict"]

        test = param_dict["test"]

        lamb_str = "_".join([f"{name}-{value}" for name, value in lamb_dict.items()])
        name = f"{feat}_{lamb_str}"

        if not test:
            return

        model_path = f"./models/{name}.pt"

        if not os.path.exists(model_path):
            return

        print(f"Loading model from {model_path}")
        model = torch.load(model_path, weights_only=False)

    feat_dicts, _ = preprocess_data_latent(feat_names)
    test_dataset = CafeDatasetLatent("test", feat_names, feat_dicts)
    test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

    result = evaluate_model(name, test_dataloader, model, device)

    update_test_results(result)

Run MostCommon and Naive first.

In [117]:
name = "most_common"
test_model(None, name)
          name       mse      rmse  accuracy
0  base_latent  0.681222  0.825106  0.559864
1        final  0.657577  0.810537  0.574121
2  most_common  1.594156  1.262308  0.523571
3        naive  1.006339  1.003055   0.28008
4         base  0.687114  0.828693  0.556383
In [118]:
test_name = "naive"
test_model(None, test_name)
          name       mse      rmse  accuracy
0  base_latent  0.681222  0.825106  0.559864
1        final  0.657577  0.810537  0.574121
2  most_common  1.594156  1.262308  0.523571
3        naive  1.006339  1.003055   0.28008
4         base  0.687114  0.828693  0.556383
In [119]:
for baseline_param_dict in baseline_params:
    test_model(baseline_param_dict)
Loading model from ./models/base_alpha-0_user-0.1_cafe-1.pt
          name       mse      rmse  accuracy
0  base_latent  0.681222  0.825106  0.559864
1        final  0.657577  0.810537  0.574121
2  most_common  1.594156  1.262308  0.523571
3        naive  1.006339  1.003055   0.28008
4         base  0.687114  0.828693  0.556383
Loading model from ./models/base_latent_alpha-0_user-0.1_cafe-1.pt
          name       mse      rmse  accuracy
0  base_latent  0.681222  0.825106  0.559864
1        final  0.657577  0.810537  0.574121
2  most_common  1.594156  1.262308  0.523571
3        naive  1.006339  1.003055   0.28008
4         base  0.687114  0.828693  0.556383
In [120]:
test_model(final_param_dict)
Loading model from ./models/final_alpha-0_user-0.1_cafe-1_chains-0.1_price-0.1_open_hours-0.1_location-1_weekday-1_hour-1_period-0.1.pt
          name       mse      rmse  accuracy
0  base_latent  0.681222  0.825106  0.559864
1        final  0.657577  0.810537  0.574121
2  most_common  1.594156  1.262308  0.523571
3        naive  1.006339  1.003055   0.28008
4         base  0.687114  0.828693  0.556383

Final Comparision¶

In [121]:
def plot_test_comparisons():
    if not os.path.exists("./metrics.json"):
        return None

    df = pd.read_csv("test_results.csv")
    eval_list = list(df.columns)[1:]

    _, axes = plt.subplots(nrows=1, ncols=len(eval_list), figsize=(30, 5))
    sns.set_theme(style="whitegrid", palette="viridis")

    for i in range(len(eval_list)):
        sns.barplot(df, x="name", y=eval_list[i], ax=axes[i])
        axes[i].set_title(eval_list[i])

    plt.suptitle("Comparison of test performances of final model and baselines")
    plt.show()
In [122]:
plot_test_comparisons()
No description has been provided for this image

Recommender¶

Here, we build three types of recommender models to recommend cafe given a user id.

In [123]:
def get_names(param_dict):
    feat = param_dict["feat"]
    feat_names = param_dict["feat_names"]
    lamb_dict = param_dict["lamb_dict"]

    lamb_str = "_".join([f"{name}-{value}" for name, value in lamb_dict.items()])
    model_name = f"{feat}_{lamb_str}"

    return model_name, feat_names
In [124]:
class LatentBasedCafeRecommender():
    def __init__(self, param_dict, num_recommends, device):
        self.name, self.feat_names = get_names(param_dict)
        model = torch.load(f"./models/{self.name}.pt", weights_only=False).to(device)

        self.user_latents = model.latents["user"]
        self.cafe_latents = model.latents["cafe"]

        self.feat_dicts, _ = preprocess_data_latent(self.feat_names)

        self.num_recommends = num_recommends

        self.index2cafe = {index: cafe for (cafe, index) in self.feat_dicts["cafe"].items()}

    def get_user_gmap_ids(self, user_id):
        reviews = pd.read_csv(f"./datasets/processed/reviews.csv")

        user_reviews = reviews[reviews["user_id"] == user_id]
        user_gmap_ids = user_reviews["gmap_id"].values

        return user_gmap_ids

class CosineBasedCafeRecommender(LatentBasedCafeRecommender):
    def recommend(self, user_id):
        user_index = self.feat_dicts["user"][user_id]
        user_latent = self.user_latents[user_index]

        dot = torch.einsum("bd,d->b", self.cafe_latents, user_latent)
        user_norm = torch.norm(user_latent, p=2, dim=0)
        cafe_norm = torch.norm(self.cafe_latents, p=2, dim=1)

        cosines = dot / (user_norm * cafe_norm + 1e-5)
        indices = torch.flip(torch.argsort(cosines), dims=[0])
        user_gmap_ids = self.get_user_gmap_ids(user_id)

        i = 0
        recommendeds = []
        metrics = []
        while True:
            index = indices[i].item()
            recommended_cafe = self.index2cafe[index]
            if recommended_cafe not in user_gmap_ids:
                recommendeds.append(recommended_cafe)
                metrics.append(cosines[index].item())

            if len(recommendeds) == self.num_recommends:
                break

            i += 1

        return recommendeds, metrics

class NormBasedCafeRecommender(LatentBasedCafeRecommender):
    def recommend(self, user_id):
        user_index = self.feat_dicts["user"][user_id]
        user_latent = self.user_latents[user_index]

        norms = torch.norm(self.cafe_latents - user_latent, p=2, dim=1)
        indices = torch.argsort(norms)
        user_gmap_ids = self.get_user_gmap_ids(user_id)

        i = 0
        recommendeds = []
        metrics = []
        while True:
            index = indices[i].item()
            recommended_cafe = self.index2cafe[index]
            if recommended_cafe not in user_gmap_ids:
                recommendeds.append(recommended_cafe)
                metrics.append(norms[index].item())

            if len(recommendeds) == self.num_recommends:
                break

            i += 1

        return recommendeds, metrics

Next, we recommend cafes which has the highest predicted rating for a user.

In [125]:
class CafeDatasetRecommender(CafeDatasetLatent):
    def __init__(self, user_id, feat_names, feat_dicts):
        reviews = pd.read_csv(f"./datasets/processed/reviews.csv")
        cafes = pd.read_csv(f"./datasets/processed/cafes.csv")

        user_reviews = reviews[reviews["user_id"] == user_id]
        user_reviews = user_reviews.sort_values(by=["time"])
        user_gmap_ids = user_reviews["gmap_id"].values
        self.gmap_ids = cafes[np.logical_not(cafes["gmap_id"].isin(user_gmap_ids).values)]["gmap_id"].values

        self.feat_names = feat_names
        self.feat_dicts = feat_dicts

        self.user_id = user_id

    def __len__(self):
        return self.gmap_ids.shape[0]

    def __getitem__(self, index):
        gmap_id = self.gmap_ids[index]
        feats = []
        for name in self.feat_names:
            if name == "alpha":
                feat = torch.ones(1)
                feats.append(feat)

            elif name == "user":
                feat_dict = self.feat_dicts[name]
                feat = torch.zeros(len(feat_dict.keys()))
                feat[feat_dict[self.user_id]] = 1.
                feats.append(feat)

            elif name == "cafe":
                feat_dict = self.feat_dicts[name]
                feat = torch.zeros(len(feat_dict.keys()))
                feat[feat_dict[gmap_id]] = 1.
                feats.append(feat)

            elif name == "weekday":
                feat = torch.tensor(unix_weekday_to_onehot(1764722615000))
                feats.append(feat)

            elif name == "hour":
                feat = torch.tensor(unix_hour_to_onehot(1764722615000))
                feats.append(feat)

            elif name == "price":
                feat_dict = self.feat_dicts[name]
                feat = torch.tensor(price_to_onehot(feat_dict[gmap_id]))
                feats.append(feat)

            elif name == "open_hours":
                feat_dict = self.feat_dicts[name]
                feat = torch.tensor(hours_to_onehot(feat_dict[gmap_id]))
                feats.append(feat)

            elif name == "location":
                feat_dict = self.feat_dicts[name]
                feat = torch.tensor(location_to_onehot(feat_dict[gmap_id]))
                feats.append(feat)

            elif name == "chains":
                feat_dict = self.feat_dicts[name]
                feat = torch.zeros(3)
                feat[feat_dict[gmap_id]] = 1.
                feats.append(feat)

            elif name == "period":
                feat = torch.tensor([0, 0, 1.])
                feats.append(feat)

            elif name == "prev":
                cafe_feat_dict = self.feat_dicts['cafe']    # All cafes
                feat_dict = self.feat_dicts[name]           # List of user -> list of all cafes user rated

                feat = torch.zeros(len(cafe_feat_dict.keys()))
                feat[cafe_feat_dict[feat_dict[self.user_id][-1]]] = 1.
                feats.append(feat)

            else:
                raise NotImplementedError

        return feats

class RankBasedCafeRecommender():
    def __init__(self, param_dict, num_recommends, device):
        self.name, self.feat_names = get_names(param_dict)
        self.model = torch.load(f"./models/{self.name}.pt", weights_only=False).to(device)
        self.device = device

        self.feat_dicts, _ = preprocess_data_latent(self.feat_names)

        self.num_recommends = num_recommends

    def recommend(self, user_id):
        dataset = CafeDatasetRecommender(user_id, self.feat_names, self.feat_dicts)
        dataloader = DataLoader(dataset, batch_size=2048, shuffle=False)

        predictions = np.array(self.run_predictions(dataloader))
        gmap_ids = np.array(dataset.gmap_ids)

        indices = np.argsort(predictions)[::-1][:self.num_recommends]

        return gmap_ids[indices], predictions[indices]

    def run_predictions(self, dataloader):
        predictions = []
        with torch.no_grad():
            self.model.to(self.device)
            self.model.eval()

            for feats in dataloader:
                feats = {name: f.to(self.device) for name, f in zip(self.feat_names, feats)}
                pred_ratings = self.model(feats).tolist()
                predictions += pred_ratings

        return predictions
In [126]:
def recommend_random_sampled_users(recommenders, num_samples):
    users = pd.read_csv("./datasets/processed/users.csv")
    reviews = pd.read_csv("./datasets/processed/reviews.csv")
    cafes = pd.read_csv("./datasets/processed/cafes.csv")

    reviews = reviews.sort_values(by=["user_id", "time"])

    sampled_users = users.sample(num_samples, random_state=42)

    for (user_id, num_reviews) in sampled_users.values:
        user_reviews = reviews[reviews["user_id"] == user_id]

        for i, user_review in enumerate(user_reviews.values):
            if i == 0:
                user_name = user_review[2]
                print(f"{user_name:20s}: ({num_reviews} reviews)")
            gmap_id = user_review[0]
            review_time = user_review[3]
            rating = user_review[4]


            cafe = cafes[cafes["gmap_id"] == gmap_id].values[0]
            cafe_name = cafe[1]

            date = datetime.fromtimestamp(review_time / 1000, tz=timezone.utc)
            date_str = date.strftime("%Y/%m/%d")
            print(f"    {cafe_name:50s}: {rating} rated at {date_str}")

        print()

        for recommender_name, recommender in recommenders.items():
            recommended_cafes, pred_ratings = recommender.recommend(user_id)
            print(f"{recommender_name} based recommendation for {user_name}")
            for recommended_cafe, pred_rating in zip(recommended_cafes, pred_ratings):
                cafe = cafes[cafes["gmap_id"] == recommended_cafe].values[0]
                cafe_name = cafe[1]
                print(f"    {cafe_name:50s}: {pred_rating}")

            print()

        print("=" * 80)
        print()

def save_batch_recommendations(recommenders, num_samples):
    users = pd.read_csv("./datasets/processed/users.csv")
    reviews = pd.read_csv("./datasets/processed/reviews.csv")

    reviews = reviews.sort_values(by=["user_id", "time"])

    sampled_users = users.sample(num_samples, random_state=42)

    recommendations = {}
    for recommender_name, recommender in recommenders.items():
        all_cafes = []
        for (user_id, _) in tqdm.tqdm(sampled_users.values):
            recommended_cafes, _ = recommender.recommend(user_id)
            all_cafes += list(recommended_cafes)

        recommendations[recommender_name] = all_cafes

    with open("./batch_recommendations.json", "w") as f:
        json.dump(recommendations, f)

def plot_consumptions():
    reviews = pd.read_csv("./datasets/processed/reviews.csv")

    review_counts = reviews.groupby("gmap_id")["rating"].agg(["count", "mean"])
    gmap_ids = review_counts.index.values
    counts = review_counts["count"].values
    avg_ratings = review_counts["mean"].values

    indices = np.argsort(counts)[::-1]

    with open("./batch_recommendations.json", "r") as f:
        recommendations = {key: np.array(values) for key, values in json.load(f).items()}

    freqs = []
    for index in indices:
        gmap_id = gmap_ids[index]
        count = counts[index]
        avg_rating = avg_ratings[index]

        items = []
        for _, values in recommendations.items():
            items.append(int(np.sum(np.array(values) == gmap_id)))

        freqs.append([gmap_id, avg_rating * 100, int(count)] + items)

    df = pd.DataFrame(freqs, columns=["gmap_id", "avg_rating", "counts"] + list(recommendations.keys()))
    sub_df = df.iloc[:100]

    sns.set_theme(style="whitegrid", palette="viridis")
    plt.figure(figsize=(20, 10))
    sns.barplot(sub_df, x="gmap_id", y="counts", width=1.0, alpha=0.4)
    sns.lineplot(sub_df, x="gmap_id", y="avg_rating", label="Average Rating")

    for key in recommendations.keys():
        sns.lineplot(sub_df, x="gmap_id", y=key, label=key, linewidth=5)

    plt.title("Consumption and recommended frequencies of cafes")
    plt.xlabel("Sorted list of cafes")
    plt.xticks([])
    plt.ylabel("Frequency")
    plt.legend()
    plt.tight_layout()
    plt.show()

    sns.set_theme(style="whitegrid", palette="viridis")
    plt.figure(figsize=(20, 10))
    for key in ["Cosine", "Norm"]:
        sns.histplot(df, x=key, label=key, alpha=0.4, bins=np.arange(11))

    plt.title("Histogram of recommendations for each method.")
    plt.xlabel("The number of recommendations")
    plt.ylabel("Frequency")
    plt.legend()
    plt.tight_layout()
    plt.show()
In [127]:
baseline_param_dict = baseline_params[1]

num_recommends = 10

rank_recommender = RankBasedCafeRecommender(baseline_param_dict, num_recommends, device)
cosine_recommender = CosineBasedCafeRecommender(baseline_param_dict , num_recommends, device)
norm_recommender = NormBasedCafeRecommender(baseline_param_dict , num_recommends, device)
recommenders = {"Rank": rank_recommender, "Cosine": cosine_recommender, "Norm": norm_recommender}

recommend_random_sampled_users(recommenders, 10)

num_samples = 1000

if not os.path.exists("./batch_recommendations.json"):
    save_batch_recommendations(recommenders, num_samples)

plot_consumptions()
J.McGarv            : (27 reviews)
    The Coffee Bean & Tea Leaf                        : 5.0 rated at 2011/07/10
    Starbucks                                         : 4.0 rated at 2015/12/26
    Starbucks                                         : 4.0 rated at 2015/12/26
    grounds bakery & cafe                             : 4.0 rated at 2015/12/26
    Starbucks                                         : 4.0 rated at 2015/12/27
    Coffee Cup Cafe                                   : 4.0 rated at 2016/02/28
    McDonald's                                        : 3.0 rated at 2016/09/04
    Starbucks                                         : 4.0 rated at 2016/09/04
    Starbucks                                         : 4.0 rated at 2016/09/04
    Heritage Family Pantry                            : 3.0 rated at 2016/09/04
    McDonald's                                        : 3.0 rated at 2016/09/04
    Starbucks                                         : 4.0 rated at 2016/09/04
    Driftwood Cafe                                    : 4.0 rated at 2016/09/04
    The Coffee Bean & Tea Leaf                        : 3.0 rated at 2016/09/04
    Panera Bread                                      : 3.0 rated at 2016/09/04
    Barnes & Noble                                    : 3.0 rated at 2016/09/04
    Hot Java                                          : 3.0 rated at 2016/09/04
    Starbucks                                         : 4.0 rated at 2018/01/27
    Starbucks                                         : 4.0 rated at 2018/01/27
    McDonald's                                        : 2.0 rated at 2018/11/01
    Starbucks                                         : 4.0 rated at 2018/12/27
    Dunkin'                                           : 5.0 rated at 2019/05/17
    Starbucks                                         : 4.0 rated at 2019/05/26
    McDonald's                                        : 3.0 rated at 2019/06/08
    Starbucks                                         : 4.0 rated at 2019/09/14
    Sizzler - Los Alamitos                            : 4.0 rated at 2019/12/25
    Panera Bread                                      : 3.0 rated at 2019/12/25

Rank based recommendation for J.McGarv
    Dandelion Chocolate                               : 4.111937522888184
    Réveille Coffee Co.                               : 4.089178085327148
    Porto's Bakery and Cafe                           : 4.080690383911133
    Bird Rock Coffee Roasters                         : 4.0632829666137695
    Kuppa Joy Coffee House                            : 4.063227653503418
    Hook Fish Co                                      : 4.063088893890381
    Verve Coffee Roasters                             : 4.060028076171875
    Philz Coffee                                      : 4.058209419250488
    The Fig Tree Coffee, Art, & Music Lounge          : 4.050896167755127
    Kéan Coffee Artisan Roasters                      : 4.045582294464111

Cosine based recommendation for J.McGarv
    Tin Roof Bakery and Cafe                          : 0.6770483255386353
    Starbucks                                         : 0.6746904253959656
    McDonald's                                        : 0.6740233302116394
    Starbucks                                         : 0.6229677200317383
    Hot Wings Cafe                                    : 0.6194844841957092
    McDonald's                                        : 0.600993812084198
    street level cafe                                 : 0.5976296663284302
    Starbucks                                         : 0.5962474942207336
    Rocky Cola Café                                   : 0.5829941034317017
    Wildflower Café and Bakery                        : 0.5821545720100403

Norm based recommendation for J.McGarv
    Starbucks                                         : 0.3031110465526581
    McDonald's                                        : 0.3048095107078552
    Hot Wings Cafe                                    : 0.31191280484199524
    Starbucks                                         : 0.3165348470211029
    McDonald's                                        : 0.31780409812927246
    Rocky Cola Café                                   : 0.31805914640426636
    Starbucks                                         : 0.32004138827323914
    The Pie Hole                                      : 0.3210485875606537
    Starbucks                                         : 0.3212147057056427
    Antigua Bread                                     : 0.3240455389022827

================================================================================

Miguel Hernandez    : (25 reviews)
    Starbucks                                         : 1.0 rated at 2019/02/16
    McDonald's                                        : 4.0 rated at 2019/03/17
    McDonald's                                        : 4.0 rated at 2019/03/17
    Starbucks                                         : 4.0 rated at 2019/03/17
    Starbucks                                         : 4.0 rated at 2019/03/22
    Fantastic Cafe                                    : 4.0 rated at 2019/05/04
    Panera Bread                                      : 4.0 rated at 2019/06/04
    Porto's Bakery and Cafe                           : 4.0 rated at 2019/06/04
    85°C Bakery Cafe - South Gate                     : 5.0 rated at 2019/07/19
    Sizzler                                           : 4.0 rated at 2019/08/11
    Tierra Mia Coffee                                 : 5.0 rated at 2019/09/12
    Tamarindo Latin Kitchen & Bar                     : 4.0 rated at 2019/10/12
    Starbucks                                         : 5.0 rated at 2019/12/12
    Starbucks                                         : 5.0 rated at 2019/12/12
    Poached Neighborhood Kitchen                      : 4.0 rated at 2020/02/08
    Sequoia Coffee Co.                                : 5.0 rated at 2020/08/07
    The Coffee Bean & Tea Leaf                        : 5.0 rated at 2020/10/18
    Tierra Mia Coffee                                 : 5.0 rated at 2020/12/19
    Trejo’s Coffee & Donuts                           : 5.0 rated at 2021/01/21
    Tierra Mia Coffee Drive Thru                      : 5.0 rated at 2021/02/07
    Noah's NY Bagels                                  : 5.0 rated at 2021/02/10
    Tierra Mia Coffee                                 : 5.0 rated at 2021/02/19
    Starbucks                                         : 5.0 rated at 2021/02/24
    Starbucks                                         : 5.0 rated at 2021/03/23
    Bionicos Plus                                     : 5.0 rated at 2021/05/18

Rank based recommendation for Miguel Hernandez
    Dandelion Chocolate                               : 4.721451759338379
    Réveille Coffee Co.                               : 4.699912071228027
    Philz Coffee                                      : 4.685025691986084
    Kuppa Joy Coffee House                            : 4.6698689460754395
    Verve Coffee Roasters                             : 4.668992042541504
    Bird Rock Coffee Roasters                         : 4.661994934082031
    Hook Fish Co                                      : 4.661142349243164
    The Fig Tree Coffee, Art, & Music Lounge          : 4.660951614379883
    The Conservatory for Coffee, Tea & Cocoa          : 4.65963888168335
    Sidecar Doughnuts & Coffee                        : 4.65231990814209

Cosine based recommendation for Miguel Hernandez
    McDonald's                                        : 0.6919595003128052
    McDonald's                                        : 0.6873427629470825
    McDonald's                                        : 0.6735078692436218
    Starbucks                                         : 0.672364354133606
    McDonald's                                        : 0.6641788482666016
    Panera Bread                                      : 0.6609896421432495
    McDonald's                                        : 0.6584145426750183
    McDonald's                                        : 0.6555770635604858
    McDonald's                                        : 0.6404334306716919
    McDonald's                                        : 0.6370680928230286

Norm based recommendation for Miguel Hernandez
    Grand View Market                                 : 0.2595973312854767
    La Monarca Bakery & Cafe                          : 0.26598837971687317
    Starbucks                                         : 0.2693083882331848
    McDonald's                                        : 0.2693861424922943
    Caffe Luxxe                                       : 0.26955342292785645
    Neenach Cafe & Market                             : 0.27130362391471863
    Starbucks                                         : 0.2726983428001404
    Starbucks                                         : 0.27272847294807434
    McDonald's                                        : 0.27305155992507935
    McDonald's                                        : 0.27422618865966797

================================================================================

Marci eg9           : (20 reviews)
    The Coffee Bean & Tea Leaf                        : 5.0 rated at 2018/08/25
    McDonald's                                        : 3.0 rated at 2018/08/30
    The Stonehaus                                     : 4.0 rated at 2018/09/04
    McDonald's                                        : 3.0 rated at 2018/09/05
    McDonald's                                        : 5.0 rated at 2018/10/07
    McDonald's                                        : 2.0 rated at 2018/10/07
    The Coffee Bean & Tea Leaf                        : 5.0 rated at 2018/10/07
    Starbucks                                         : 5.0 rated at 2018/10/07
    McDonald's                                        : 3.0 rated at 2018/10/09
    McDonald's                                        : 2.0 rated at 2019/06/16
    McDonald's                                        : 3.0 rated at 2019/09/06
    McDonald's                                        : 3.0 rated at 2019/09/06
    McDonald's                                        : 3.0 rated at 2019/09/06
    McDonald's                                        : 3.0 rated at 2020/06/29
    The Coffee Bean & Tea Leaf                        : 5.0 rated at 2020/06/29
    Cafe Demitasse                                    : 4.0 rated at 2020/07/14
    McDonald's                                        : 3.0 rated at 2020/07/18
    Starbucks                                         : 4.0 rated at 2020/08/07
    Crave Cafe                                        : 4.0 rated at 2020/08/18
    Starbucks                                         : 3.0 rated at 2021/02/23

Rank based recommendation for Marci eg9
    Porto's Bakery and Cafe                           : 4.255527496337891
    Dandelion Chocolate                               : 4.196784019470215
    Philz Coffee                                      : 4.183241844177246
    Porto's Bakery and Cafe                           : 4.1503682136535645
    Sightglass Coffee                                 : 4.147553443908691
    The Original Pancake House - Whittier             : 4.147345542907715
    Stumptown Coffee Roasters                         : 4.142796516418457
    Tropicana Bakery & Cuban Cafe                     : 4.1407694816589355
    Barnes & Noble                                    : 4.128203868865967
    Dutch Bros Coffee                                 : 4.122499465942383

Cosine based recommendation for Marci eg9
    Starbucks                                         : 0.815220832824707
    Starbucks                                         : 0.7942237257957458
    Rev’d Up Coffee & Classics                        : 0.7926855087280273
    7 Leaves Cafe Tustin                              : 0.7807711958885193
    Active Culture                                    : 0.7782267928123474
    Starbucks                                         : 0.7753311991691589
    Brew Coffee Spot                                  : 0.7698622345924377
    Better Buzz Coffee Pacific Beach West             : 0.7558760046958923
    Arizmendi Bakery & Café                           : 0.7516003251075745
    Starbucks                                         : 0.7477274537086487

Norm based recommendation for Marci eg9
    Arizmendi Bakery & Café                           : 0.32547229528427124
    Starbucks                                         : 0.3262135088443756
    Starbucks                                         : 0.3361106812953949
    Starbucks                                         : 0.3455308675765991
    Big Thyme Sandwich Company                        : 0.3460867702960968
    Starbucks                                         : 0.3480357527732849
    Brew Coffee Spot                                  : 0.34874314069747925
    Better Buzz Coffee Pacific Beach West             : 0.3497498035430908
    Active Culture                                    : 0.35141339898109436
    Pozi's Fresh Grill                                : 0.3543124496936798

================================================================================

Boris Goncharov     : (21 reviews)
    Bicycle Coffee LA Cafe + Roastery                 : 4.0 rated at 2018/10/21
    The Coffee Bean & Tea Leaf                        : 4.0 rated at 2018/10/21
    Blue Bottle Coffee                                : 5.0 rated at 2018/12/23
    Alfred Coffee Melrose Place                       : 5.0 rated at 2019/01/27
    Starbucks                                         : 5.0 rated at 2019/02/06
    Starbucks                                         : 5.0 rated at 2019/04/02
    Jones Coffee Roasters                             : 5.0 rated at 2019/05/04
    Tilt Coffee Bar                                   : 4.0 rated at 2019/05/13
    Black Elephant Coffee                             : 4.0 rated at 2019/05/21
    Coffee Commissary                                 : 5.0 rated at 2019/06/07
    Cafe de Leche                                     : 4.0 rated at 2019/06/10
    Cyber City Esports Center | Little Tokyo          : 5.0 rated at 2019/07/19
    Starbucks                                         : 4.0 rated at 2019/09/26
    Espresso Profeta                                  : 5.0 rated at 2019/09/28
    Superba Snacks + Coffee                           : 5.0 rated at 2019/11/29
    Black Dog Coffee                                  : 3.0 rated at 2020/02/03
    Cafe Demitasse                                    : 5.0 rated at 2020/02/29
    Starbucks                                         : 5.0 rated at 2021/02/08
    The Highland Cafe                                 : 3.0 rated at 2021/02/08
    Verve Coffee Roasters                             : 5.0 rated at 2021/02/08
    Crystal Lake Cafe                                 : 5.0 rated at 2021/02/08

Rank based recommendation for Boris Goncharov
    Porto's Bakery and Cafe                           : 4.792549133300781
    Dandelion Chocolate                               : 4.748739719390869
    Réveille Coffee Co.                               : 4.701197624206543
    Philz Coffee                                      : 4.698929786682129
    Sidecar Doughnuts & Coffee                        : 4.693772315979004
    The Conservatory for Coffee, Tea & Cocoa          : 4.687025547027588
    Sightglass Coffee                                 : 4.680482387542725
    Chez Panisse                                      : 4.665746688842773
    Stumptown Coffee Roasters                         : 4.658592700958252
    Bird Rock Coffee Roasters                         : 4.6552276611328125

Cosine based recommendation for Boris Goncharov
    Starbucks                                         : 0.6997483968734741
    Cafe Lakeview                                     : 0.6816622018814087
    The Coffee Bean & Tea Leaf                        : 0.68093341588974
    Arnoldi's Cafe                                    : 0.6459140777587891
    Carlsbad Coffee House                             : 0.6433436870574951
    Bread & Porridge                                  : 0.6354042887687683
    Lefty O'Doul's                                    : 0.631080150604248
    ROMAN AROMA Italian Gran Caffe                    : 0.6300442814826965
    Paris Baguette                                    : 0.6240677833557129
    Rick's Restaurant & Bakery                        : 0.6120269894599915

Norm based recommendation for Boris Goncharov
    Starbucks                                         : 0.2349633127450943
    The Coffee Bean & Tea Leaf                        : 0.23510074615478516
    Bread & Porridge                                  : 0.24451382458209991
    ROMAN AROMA Italian Gran Caffe                    : 0.24571987986564636
    Rick's Restaurant & Bakery                        : 0.2507384419441223
    Winchell's Donut House                            : 0.25248539447784424
    Starbucks                                         : 0.2526015341281891
    Carlsbad Coffee House                             : 0.25455519556999207
    Au79 Tea Express                                  : 0.25527361035346985
    FRED'S Place                                      : 0.255776971578598

================================================================================

Sean O’Connor       : (20 reviews)
    The Creamery                                      : 2.0 rated at 2012/08/06
    The Creamery                                      : 2.0 rated at 2012/08/06
    Cafe Prague                                       : 3.0 rated at 2012/10/03
    The Plant Café Organic                            : 3.0 rated at 2012/12/22
    Velo Rouge Cafe                                   : 4.0 rated at 2013/03/04
    Tierra Mia Coffee                                 : 4.0 rated at 2013/10/16
    Gaslamp Cafe                                      : 4.0 rated at 2014/07/08
    The Sacred Grounds Cafe                           : 1.0 rated at 2015/07/15
    Antigua Coffee Shop                               : 4.0 rated at 2016/03/23
    Flywheel Coffee Roasters                          : 4.0 rated at 2016/04/10
    Flywheel Coffee Roasters                          : 4.0 rated at 2016/04/10
    Matching Half                                     : 4.0 rated at 2016/07/17
    Java Beach At The Zoo                             : 4.0 rated at 2016/11/24
    The Mill                                          : 5.0 rated at 2016/11/24
    South Beach Cafe                                  : 3.0 rated at 2016/11/24
    Java Beach Cafe                                   : 4.0 rated at 2016/11/24
    Stable Cafe                                       : 5.0 rated at 2018/04/02
    Hollow                                            : 4.0 rated at 2019/11/15
    Snowbird Coffee                                   : 5.0 rated at 2020/03/07
    Manitas Cafe                                      : 4.0 rated at 2020/05/27

Rank based recommendation for Sean O’Connor
    Dandelion Chocolate                               : 4.034237861633301
    Réveille Coffee Co.                               : 3.985837936401367
    Sidecar Doughnuts & Coffee                        : 3.972456455230713
    Philz Coffee                                      : 3.967827558517456
    Sightglass Coffee                                 : 3.96779203414917
    Kuppa Joy Coffee House                            : 3.958588123321533
    Porto's Bakery and Cafe                           : 3.948725700378418
    The Conservatory for Coffee, Tea & Cocoa          : 3.9479877948760986
    Porto's Bakery and Cafe                           : 3.9446637630462646
    Chez Panisse                                      : 3.9425134658813477

Cosine based recommendation for Sean O’Connor
    Trouble Coffee                                    : 0.7098062038421631
    Starbucks                                         : 0.6409028768539429
    The Farmhand                                      : 0.603118360042572
    Kuppa Joy Coffee House                            : 0.6017208695411682
    Kung Fu Tea                                       : 0.5951878428459167
    Starbucks                                         : 0.5912845134735107
    Flower Farm Cafe                                  : 0.5888568162918091
    Urth Caffe Pasadena                               : 0.5753278136253357
    Starbucks                                         : 0.5632597804069519
    Duboce Park Cafe                                  : 0.5627772808074951

Norm based recommendation for Sean O’Connor
    Trouble Coffee                                    : 0.22670993208885193
    Starbucks                                         : 0.23266303539276123
    Kuppa Joy Coffee House                            : 0.2339998483657837
    Starbucks                                         : 0.2384364753961563
    Duboce Park Cafe                                  : 0.24150070548057556
    McDonald's                                        : 0.2456066906452179
    Starbucks                                         : 0.24613437056541443
    McDonald's                                        : 0.24625752866268158
    Kéan Coffee Artisan Roasters                      : 0.2477489709854126
    Thomas                                            : 0.2498263567686081

================================================================================

Daniel Morgan       : (25 reviews)
    McDonald's                                        : 5.0 rated at 2017/07/13
    Starbucks Reserve                                 : 5.0 rated at 2018/02/21
    McDonald's                                        : 5.0 rated at 2018/02/27
    Starbucks                                         : 5.0 rated at 2018/02/27
    Heidi's Pies Restaurant                           : 5.0 rated at 2018/03/10
    Philz Coffee                                      : 5.0 rated at 2018/03/11
    McDonald's                                        : 5.0 rated at 2018/04/25
    Starbucks                                         : 5.0 rated at 2018/06/27
    Starbucks                                         : 5.0 rated at 2018/06/27
    McDonald's                                        : 5.0 rated at 2018/06/27
    Starbucks                                         : 5.0 rated at 2018/06/27
    Starbucks                                         : 5.0 rated at 2018/06/27
    McDonald's                                        : 5.0 rated at 2018/06/27
    Sizzler                                           : 5.0 rated at 2018/06/27
    McDonald's                                        : 5.0 rated at 2018/06/27
    Barnes & Noble                                    : 5.0 rated at 2019/01/21
    La Stazione Coffee & Wine Bar                     : 5.0 rated at 2019/02/08
    Starbucks                                         : 5.0 rated at 2019/02/08
    Starbucks                                         : 5.0 rated at 2019/02/08
    Philz Coffee                                      : 3.0 rated at 2019/12/16
    McDonald's                                        : 5.0 rated at 2019/12/16
    Barnes & Noble                                    : 5.0 rated at 2019/12/16
    Dunkin'                                           : 5.0 rated at 2019/12/16
    Starbucks                                         : 5.0 rated at 2019/12/16
    McDonald's                                        : 5.0 rated at 2020/02/06

Rank based recommendation for Daniel Morgan
    Réveille Coffee Co.                               : 5.263625621795654
    Kuppa Joy Coffee House                            : 5.252823352813721
    Verve Coffee Roasters                             : 5.252277374267578
    Bird Rock Coffee Roasters                         : 5.243653774261475
    Dandelion Chocolate                               : 5.236135959625244
    Blue Bottle Coffee                                : 5.232618808746338
    Hook Fish Co                                      : 5.225208282470703
    The Fig Tree Coffee, Art, & Music Lounge          : 5.224587440490723
    Bouchon Bakery                                    : 5.224287986755371
    Scout Coffee Co.                                  : 5.223339080810547

Cosine based recommendation for Daniel Morgan
    McDonald's                                        : 0.8614490032196045
    Starbucks                                         : 0.8534631133079529
    McDonald's                                        : 0.8496585488319397
    Starbucks                                         : 0.8284366726875305
    From the Hearth Cafe                              : 0.8187785744667053
    McDonald's                                        : 0.8135740160942078
    Coliseum Public Market                            : 0.8121858835220337
    Starbucks                                         : 0.8106656074523926
    McDonald's                                        : 0.810453474521637
    McDonald's                                        : 0.8053823113441467

Norm based recommendation for Daniel Morgan
    McDonald's                                        : 0.2521427273750305
    Coliseum Public Market                            : 0.2890159785747528
    Starbucks                                         : 0.29426711797714233
    McDonald's                                        : 0.30429312586784363
    McDonald's                                        : 0.30948498845100403
    McDonald's                                        : 0.3163629472255707
    McDonald's                                        : 0.3179888129234314
    McDonald's                                        : 0.31855323910713196
    McDonald's                                        : 0.31868550181388855
    Hollywood Cafe                                    : 0.3211582601070404

================================================================================

bruce paulson       : (20 reviews)
    Lou's Coffee Shop                                 : 3.0 rated at 2017/07/06
    Brown Chicken Brown Cow                           : 3.0 rated at 2017/07/06
    McDonald's                                        : 5.0 rated at 2017/08/30
    Starbucks                                         : 4.0 rated at 2017/11/23
    Starbucks                                         : 3.0 rated at 2017/11/29
    McDonald's                                        : 3.0 rated at 2017/12/15
    McDonald's                                        : 3.0 rated at 2018/01/12
    McDonald's                                        : 4.0 rated at 2018/02/13
    Starbucks                                         : 3.0 rated at 2018/02/13
    McDonald's                                        : 3.0 rated at 2018/04/18
    McDonald's                                        : 3.0 rated at 2018/07/06
    Starbucks                                         : 4.0 rated at 2018/07/07
    Paul's Coffee Shop                                : 4.0 rated at 2018/07/29
    McDonald's                                        : 4.0 rated at 2018/07/30
    McDonald's                                        : 4.0 rated at 2018/07/30
    Perko's Café                                      : 3.0 rated at 2018/11/25
    Stanislaus County Fair                            : 5.0 rated at 2019/01/31
    McDonald's                                        : 4.0 rated at 2019/11/04
    McDonald's                                        : 5.0 rated at 2020/12/16
    Starbucks                                         : 4.0 rated at 2020/12/19

Rank based recommendation for bruce paulson
    Réveille Coffee Co.                               : 4.191330432891846
    Dandelion Chocolate                               : 4.189498424530029
    Verve Coffee Roasters                             : 4.185368061065674
    Hook Fish Co                                      : 4.17167854309082
    Kuppa Joy Coffee House                            : 4.154677867889404
    The Fig Tree Coffee, Art, & Music Lounge          : 4.15355920791626
    Bird Rock Coffee Roasters                         : 4.153067588806152
    Porto's Bakery and Cafe                           : 4.143840312957764
    Arizmendi Bakery                                  : 4.138145446777344
    The Pour Choice                                   : 4.135522842407227

Cosine based recommendation for bruce paulson
    McDonald's                                        : 0.8282307386398315
    Quetzal Internet Cafe                             : 0.7364310622215271
    McDonald's                                        : 0.7190155982971191
    El Cerrito Natural Grocery - Prepared Food Annex  : 0.6756705045700073
    Dutch Bros Coffee                                 : 0.6697598099708557
    McDonald's                                        : 0.6692651510238647
    The Red Hut Café                                  : 0.6675969362258911
    Starbucks                                         : 0.6641579270362854
    McDonald's                                        : 0.6611679196357727
    Tremont Café                                      : 0.659969687461853

Norm based recommendation for bruce paulson
    McDonald's                                        : 0.32931220531463623
    Starbucks                                         : 0.33887356519699097
    Dutch Bros Coffee                                 : 0.3451334536075592
    McDonald's                                        : 0.34642377495765686
    McDonald's                                        : 0.34664344787597656
    McDonald's                                        : 0.3479550778865814
    Starbucks                                         : 0.354634553194046
    McDonald's                                        : 0.35617896914482117
    El Cerrito Natural Grocery - Prepared Food Annex  : 0.3572598099708557
    McDonald's                                        : 0.36021050810813904

================================================================================

larry mucho         : (46 reviews)
    Raley's                                           : 3.0 rated at 2017/05/04
    Picasso's Gourmet Deli                            : 4.0 rated at 2017/05/04
    Limelight Bar & Café                              : 3.0 rated at 2017/06/16
    Chocolate Fish Coffee Roasters                    : 4.0 rated at 2017/10/05
    Pachamama Coffee Bar                              : 4.0 rated at 2017/10/19
    Chocolate Fish Coffee                             : 4.0 rated at 2017/11/08
    McDonald's                                        : 2.0 rated at 2017/11/16
    McDonald's                                        : 1.0 rated at 2017/11/27
    Town & Country Cafe                               : 4.0 rated at 2017/12/09
    Noah's NY Bagels                                  : 4.0 rated at 2018/02/26
    Temple Coffee Roasters                            : 3.0 rated at 2018/05/24
    Panera Bread                                      : 2.0 rated at 2018/07/13
    Starbucks                                         : 4.0 rated at 2018/07/31
    Revive Coffee & Wine                              : 4.0 rated at 2018/08/09
    Old Soul @ 40 Acres                               : 4.0 rated at 2018/08/09
    Shine Kava                                        : 4.0 rated at 2018/09/12
    McDonald's                                        : 3.0 rated at 2018/10/24
    Starbucks                                         : 3.0 rated at 2018/12/21
    McDonald's                                        : 2.0 rated at 2018/12/31
    McDonald's                                        : 3.0 rated at 2019/01/03
    Village Baking Company & Cafe                     : 4.0 rated at 2019/01/14
    Pegasus Bakery & Café                             : 5.0 rated at 2019/02/05
    Starbucks                                         : 4.0 rated at 2019/02/19
    Ettore's Bakery & Cafe                            : 4.0 rated at 2019/03/14
    Badfish Coffee & Tea                              : 4.0 rated at 2019/03/25
    Jack's Urban Eats                                 : 4.0 rated at 2019/04/04
    World Coffee House                                : 3.0 rated at 2019/04/30
    Starbucks                                         : 4.0 rated at 2019/06/11
    Fair Oaks Coffee House & Deli                     : 4.0 rated at 2019/07/25
    Sun City Lincoln Hills                            : 4.0 rated at 2019/07/25
    Emily's Good Things To Eat                        : 4.0 rated at 2019/08/29
    Karen's Bakery                                    : 4.0 rated at 2019/09/25
    McDonald's                                        : 3.0 rated at 2019/10/14
    Noah's NY Bagels                                  : 4.0 rated at 2019/11/26
    McDonald's                                        : 4.0 rated at 2019/12/20
    Shift Coffee House                                : 4.0 rated at 2020/01/09
    Old Soul Co.                                      : 4.0 rated at 2020/01/13
    McDonald's                                        : 3.0 rated at 2020/01/24
    Dos Coyotes Border Cafe                           : 4.0 rated at 2020/01/25
    Noah's NY Bagels                                  : 4.0 rated at 2020/08/07
    Raley's                                           : 4.0 rated at 2020/09/02
    Spinners                                          : 5.0 rated at 2020/09/15
    Dunkin'                                           : 4.0 rated at 2020/10/26
    Starbucks                                         : 4.0 rated at 2021/01/21
    Greek Food Imports - An Original Greek Market-Cafe: 5.0 rated at 2021/03/17
    La Bou Bakery & Café                              : 4.0 rated at 2021/05/04

Rank based recommendation for larry mucho
    Porto's Bakery and Cafe                           : 4.104151248931885
    Dandelion Chocolate                               : 4.073251724243164
    Philz Coffee                                      : 4.054135799407959
    Réveille Coffee Co.                               : 4.0345635414123535
    The Conservatory for Coffee, Tea & Cocoa          : 4.028214454650879
    Porto's Bakery and Cafe                           : 4.022084712982178
    Sidecar Doughnuts & Coffee                        : 4.020078182220459
    Sightglass Coffee                                 : 4.016838550567627
    Stumptown Coffee Roasters                         : 3.998685598373413
    Barnes & Noble                                    : 3.998082399368286

Cosine based recommendation for larry mucho
    Sushi Café                                        : 0.7324972152709961
    Starbucks                                         : 0.6834812760353088
    The Conservatory for Coffee, Tea & Cocoa          : 0.6689296960830688
    Starbucks                                         : 0.6519709229469299
    Winchell's Donut House                            : 0.6496578454971313
    Cafe 70 Degrees                                   : 0.6494067311286926
    Bruegger's Bagels                                 : 0.648546040058136
    Barnes & Noble                                    : 0.640762984752655
    Starbucks                                         : 0.6406750679016113
    Panera Bread                                      : 0.63717120885849

Norm based recommendation for larry mucho
    Sushi Café                                        : 0.21486909687519073
    Starbucks                                         : 0.22794415056705475
    The Conservatory for Coffee, Tea & Cocoa          : 0.23046958446502686
    Barnes & Noble                                    : 0.23439055681228638
    Starbucks                                         : 0.23703226447105408
    Bruegger's Bagels                                 : 0.23836304247379303
    Broadway Coffee                                   : 0.23917916417121887
    Cafe 70 Degrees                                   : 0.2403290867805481
    Panera Bread                                      : 0.24070687592029572
    Starbucks                                         : 0.24085326492786407

================================================================================

Tony Cardoza        : (29 reviews)
    Olive Pit                                         : 5.0 rated at 2018/02/23
    Olive Pit                                         : 5.0 rated at 2018/02/23
    McDonald's                                        : 5.0 rated at 2018/03/10
    Starbucks                                         : 5.0 rated at 2018/03/23
    McDonald's                                        : 5.0 rated at 2018/04/05
    Panera Bread                                      : 3.0 rated at 2018/04/09
    Panera Bread                                      : 3.0 rated at 2018/04/10
    Starbucks                                         : 5.0 rated at 2018/04/29
    Panera Bread                                      : 5.0 rated at 2018/04/29
    McDonald's                                        : 5.0 rated at 2018/04/30
    McDonald's                                        : 5.0 rated at 2018/04/30
    Sizzler - Santa Clara                             : 5.0 rated at 2018/05/19
    Half Moon Bay Coffee Co                           : 5.0 rated at 2018/05/20
    Cunha's Country Store                             : 5.0 rated at 2018/05/20
    McDonald's                                        : 5.0 rated at 2018/05/27
    Starbucks                                         : 5.0 rated at 2018/05/29
    Starbucks                                         : 5.0 rated at 2018/05/29
    Starbucks                                         : 5.0 rated at 2018/05/30
    Starbucks                                         : 5.0 rated at 2018/06/08
    Starbucks                                         : 5.0 rated at 2018/07/23
    Starbucks                                         : 5.0 rated at 2018/07/28
    Starbucks                                         : 5.0 rated at 2018/08/02
    Sizzler                                           : 5.0 rated at 2018/08/29
    Barnes & Noble                                    : 5.0 rated at 2018/09/10
    Starbucks                                         : 5.0 rated at 2018/10/06
    Panera Bread                                      : 5.0 rated at 2018/10/31
    Dutch Bros Coffee                                 : 5.0 rated at 2018/11/29
    McDonald's                                        : 1.0 rated at 2019/03/17
    Starbucks                                         : 5.0 rated at 2019/06/21

Rank based recommendation for Tony Cardoza
    Philz Coffee                                      : 5.197443962097168
    Dandelion Chocolate                               : 5.187047958374023
    Réveille Coffee Co.                               : 5.165572643280029
    The Conservatory for Coffee, Tea & Cocoa          : 5.151037693023682
    Porto's Bakery and Cafe                           : 5.131763935089111
    Verve Coffee Roasters                             : 5.129419803619385
    Sidecar Doughnuts & Coffee                        : 5.128408908843994
    Arizmendi Bakery                                  : 5.1215105056762695
    Dutch Bros Coffee                                 : 5.1190667152404785
    Sightglass Coffee                                 : 5.116888046264648

Cosine based recommendation for Tony Cardoza
    Peet's Coffee                                     : 0.6858828067779541
    Dutch Bros Coffee                                 : 0.6723281145095825
    Philz Coffee                                      : 0.6716781258583069
    Cafe Dio                                          : 0.6692067384719849
    Bistro Maxine                                     : 0.6659941077232361
    Roost Cafe                                        : 0.6520237326622009
    Panera Bread                                      : 0.6278735399246216
    Starbucks                                         : 0.6247414946556091
    Starbucks                                         : 0.6240578889846802
    The Buena Vista                                   : 0.6192073822021484

Norm based recommendation for Tony Cardoza
    Peet's Coffee                                     : 0.1839396208524704
    Dutch Bros Coffee                                 : 0.1879374235868454
    Cafe Dio                                          : 0.18902665376663208
    Bistro Maxine                                     : 0.1904258131980896
    Roost Cafe                                        : 0.19175481796264648
    Starbucks                                         : 0.19694381952285767
    Empresso Coffee                                   : 0.20052510499954224
    La Monarca Bakery & Cafe                          : 0.20055969059467316
    Panera Bread                                      : 0.20080392062664032
    Starbucks                                         : 0.20292720198631287

================================================================================

Wenhsiu Hassan      : (24 reviews)
    McDonald's                                        : 3.0 rated at 2017/12/09
    Barnes & Noble                                    : 5.0 rated at 2018/02/10
    Zorro's Cafe & Cantina                            : 5.0 rated at 2018/02/19
    Corner Bakery                                     : 4.0 rated at 2018/07/23
    Starbucks                                         : 4.0 rated at 2018/07/23
    85C Bakery Cafe - Valencia                        : 5.0 rated at 2018/08/04
    Porto's Bakery and Cafe                           : 5.0 rated at 2018/08/23
    Philz Coffee                                      : 5.0 rated at 2018/08/23
    Egg Plantation                                    : 5.0 rated at 2018/08/26
    85°C Bakery Cafe - Pasadena                       : 5.0 rated at 2018/10/14
    Copper Cafe and Bakery                            : 5.0 rated at 2019/02/23
    Copper Cafe and Bakery                            : 5.0 rated at 2019/02/23
    Starbucks                                         : 5.0 rated at 2019/03/27
    McDonald's                                        : 4.0 rated at 2019/03/27
    Starbucks                                         : 5.0 rated at 2019/03/27
    Starbucks                                         : 5.0 rated at 2019/03/30
    Ritual Coffee Roasters                            : 5.0 rated at 2019/04/01
    Madonna Inn                                       : 5.0 rated at 2019/04/02
    The Mad Greek                                     : 4.0 rated at 2019/04/05
    Starbucks                                         : 4.0 rated at 2019/04/05
    Porto's Bakery and Cafe                           : 5.0 rated at 2019/04/16
    Kona Loa Coffee Mission Viejo                     : 5.0 rated at 2019/10/12
    Bon Bon Tea House                                 : 5.0 rated at 2020/02/15
    Cassell's Hamburgers                              : 5.0 rated at 2020/10/26

Rank based recommendation for Wenhsiu Hassan
    Porto's Bakery and Cafe                           : 5.025918483734131
    Dandelion Chocolate                               : 4.985352516174316
    Philz Coffee                                      : 4.954202175140381
    Sidecar Doughnuts & Coffee                        : 4.945174217224121
    Réveille Coffee Co.                               : 4.931402683258057
    République                                        : 4.904410362243652
    The Conservatory for Coffee, Tea & Cocoa          : 4.903187274932861
    Verve Coffee Roasters                             : 4.895986557006836
    Arizmendi Bakery                                  : 4.893642902374268
    Chez Panisse                                      : 4.890368938446045

Cosine based recommendation for Wenhsiu Hassan
    Winchell's Donut House                            : 0.7012297511100769
    Cauldron Ice Cream                                : 0.6914511919021606
    Starbucks                                         : 0.6884936094284058
    Einstein Bros. Bagels                             : 0.6718407273292542
    The Coffee Bean & Tea Leaf                        : 0.6702939867973328
    Mikayla's Cafe                                    : 0.6621062159538269
    The Coffee Bean & Tea Leaf                        : 0.6590048670768738
    McDonald's                                        : 0.6553876399993896
    Starbucks                                         : 0.6521806716918945
    Starbucks                                         : 0.6489641070365906

Norm based recommendation for Wenhsiu Hassan
    Cauldron Ice Cream                                : 0.25334686040878296
    Starbucks                                         : 0.2550813555717468
    The Coffee Bean & Tea Leaf                        : 0.2629299759864807
    Mikayla's Cafe                                    : 0.26521456241607666
    Winchell's Donut House                            : 0.2658213973045349
    Einstein Bros. Bagels                             : 0.271362841129303
    The Coffee Bean & Tea Leaf                        : 0.27284595370292664
    Starbucks                                         : 0.27352938055992126
    Starbucks                                         : 0.278622567653656
    Barnes & Noble                                    : 0.27958935499191284

================================================================================

No description has been provided for this image
No description has been provided for this image
In [128]:
rank_recommender = RankBasedCafeRecommender(final_param_dict, num_recommends, device)
cosine_recommender = CosineBasedCafeRecommender(final_param_dict, num_recommends, device)
norm_recommender = NormBasedCafeRecommender(final_param_dict, num_recommends, device)
recommenders = {"Rank": rank_recommender, "Cosine": cosine_recommender, "Norm": norm_recommender}

recommend_random_sampled_users(recommenders, 10)
J.McGarv            : (27 reviews)
    The Coffee Bean & Tea Leaf                        : 5.0 rated at 2011/07/10
    Starbucks                                         : 4.0 rated at 2015/12/26
    Starbucks                                         : 4.0 rated at 2015/12/26
    grounds bakery & cafe                             : 4.0 rated at 2015/12/26
    Starbucks                                         : 4.0 rated at 2015/12/27
    Coffee Cup Cafe                                   : 4.0 rated at 2016/02/28
    McDonald's                                        : 3.0 rated at 2016/09/04
    Starbucks                                         : 4.0 rated at 2016/09/04
    Starbucks                                         : 4.0 rated at 2016/09/04
    Heritage Family Pantry                            : 3.0 rated at 2016/09/04
    McDonald's                                        : 3.0 rated at 2016/09/04
    Starbucks                                         : 4.0 rated at 2016/09/04
    Driftwood Cafe                                    : 4.0 rated at 2016/09/04
    The Coffee Bean & Tea Leaf                        : 3.0 rated at 2016/09/04
    Panera Bread                                      : 3.0 rated at 2016/09/04
    Barnes & Noble                                    : 3.0 rated at 2016/09/04
    Hot Java                                          : 3.0 rated at 2016/09/04
    Starbucks                                         : 4.0 rated at 2018/01/27
    Starbucks                                         : 4.0 rated at 2018/01/27
    McDonald's                                        : 2.0 rated at 2018/11/01
    Starbucks                                         : 4.0 rated at 2018/12/27
    Dunkin'                                           : 5.0 rated at 2019/05/17
    Starbucks                                         : 4.0 rated at 2019/05/26
    McDonald's                                        : 3.0 rated at 2019/06/08
    Starbucks                                         : 4.0 rated at 2019/09/14
    Sizzler - Los Alamitos                            : 4.0 rated at 2019/12/25
    Panera Bread                                      : 3.0 rated at 2019/12/25

Rank based recommendation for J.McGarv
    Dandelion Chocolate                               : 4.445929050445557
    Réveille Coffee Co.                               : 4.429257869720459
    Porto's Bakery and Cafe                           : 4.406644821166992
    Sidecar Doughnuts & Coffee                        : 4.392364025115967
    Pop Pie Co.                                       : 4.390052318572998
    The Pour Choice                                   : 4.389410495758057
    Kuppa Joy Coffee House                            : 4.383652687072754
    Amara Cafe & Restaurant                           : 4.374575138092041
    Dandelion Chocolate 16th Street Factory           : 4.372503757476807
    Destroyer                                         : 4.360875129699707

Cosine based recommendation for J.McGarv
    Cafe 101                                          : 0.83279949426651
    Lucerne Valley Market & Hardware                  : 0.8322334885597229
    McDonald's                                        : 0.8292198777198792
    Panera Bread                                      : 0.829023003578186
    Operacaffe                                        : 0.8029782176017761
    Starbucks                                         : 0.802041232585907
    The Bagel Shack                                   : 0.8003023862838745
    Pop's Country Cafe                                : 0.7958341836929321
    Leap Coffee                                       : 0.7894232273101807
    Starbucks                                         : 0.788210391998291

Norm based recommendation for J.McGarv
    Panera Bread                                      : 0.22679343819618225
    Café Gratitude                                    : 0.23104046285152435
    Starbucks                                         : 0.23514476418495178
    Red Giant Coffee                                  : 0.24344795942306519
    McDonald's                                        : 0.24399253726005554
    McDonald's                                        : 0.2476385533809662
    McDonald's                                        : 0.24945960938930511
    McDonald's                                        : 0.25048959255218506
    Cafe 101                                          : 0.25073716044425964
    Peanuts Deluxe Cafe                               : 0.2517056465148926

================================================================================

Miguel Hernandez    : (25 reviews)
    Starbucks                                         : 1.0 rated at 2019/02/16
    McDonald's                                        : 4.0 rated at 2019/03/17
    McDonald's                                        : 4.0 rated at 2019/03/17
    Starbucks                                         : 4.0 rated at 2019/03/17
    Starbucks                                         : 4.0 rated at 2019/03/22
    Fantastic Cafe                                    : 4.0 rated at 2019/05/04
    Panera Bread                                      : 4.0 rated at 2019/06/04
    Porto's Bakery and Cafe                           : 4.0 rated at 2019/06/04
    85°C Bakery Cafe - South Gate                     : 5.0 rated at 2019/07/19
    Sizzler                                           : 4.0 rated at 2019/08/11
    Tierra Mia Coffee                                 : 5.0 rated at 2019/09/12
    Tamarindo Latin Kitchen & Bar                     : 4.0 rated at 2019/10/12
    Starbucks                                         : 5.0 rated at 2019/12/12
    Starbucks                                         : 5.0 rated at 2019/12/12
    Poached Neighborhood Kitchen                      : 4.0 rated at 2020/02/08
    Sequoia Coffee Co.                                : 5.0 rated at 2020/08/07
    The Coffee Bean & Tea Leaf                        : 5.0 rated at 2020/10/18
    Tierra Mia Coffee                                 : 5.0 rated at 2020/12/19
    Trejo’s Coffee & Donuts                           : 5.0 rated at 2021/01/21
    Tierra Mia Coffee Drive Thru                      : 5.0 rated at 2021/02/07
    Noah's NY Bagels                                  : 5.0 rated at 2021/02/10
    Tierra Mia Coffee                                 : 5.0 rated at 2021/02/19
    Starbucks                                         : 5.0 rated at 2021/02/24
    Starbucks                                         : 5.0 rated at 2021/03/23
    Bionicos Plus                                     : 5.0 rated at 2021/05/18

Rank based recommendation for Miguel Hernandez
    Bird Rock Coffee Roasters                         : 5.006161212921143
    Bottega Italiana                                  : 4.9265971183776855
    The Coffee Bean & Tea Leaf                        : 4.923327445983887
    Dandelion Chocolate                               : 4.917479038238525
    Backyard Brew                                     : 4.9102559089660645
    Ritual Coffee Roasters                            : 4.907886028289795
    Porto's Bakery and Cafe                           : 4.9037699699401855
    The Conservatory for Coffee, Tea & Cocoa          : 4.90228271484375
    Dark Horse Coffee Roasters Truckee                : 4.898399353027344
    Black Velvet Coffee | Espresso Bar                : 4.880099296569824

Cosine based recommendation for Miguel Hernandez
    Local Fixture                                     : 0.9440152049064636
    McDonald's                                        : 0.9278134107589722
    Starbucks                                         : 0.9082697033882141
    McDonald's                                        : 0.905393660068512
    Starbucks                                         : 0.9051560759544373
    McDonald's                                        : 0.9008206725120544
    Panera Bread                                      : 0.8977365493774414
    Rick & Ann's Restaurant                           : 0.8965674638748169
    McDonald's                                        : 0.8932666182518005
    Blu Jam Cafe                                      : 0.892816960811615

Norm based recommendation for Miguel Hernandez
    McDonald's                                        : 0.42643094062805176
    McDonald's                                        : 0.4319632351398468
    McDonald's                                        : 0.44248729944229126
    McDonald's                                        : 0.44472986459732056
    McDonald's                                        : 0.4451439082622528
    McDonald's                                        : 0.4488200843334198
    McDonald's                                        : 0.4612753391265869
    McDonald's                                        : 0.461304634809494
    McDonald's                                        : 0.47278517484664917
    McDonald's                                        : 0.47511959075927734

================================================================================

Marci eg9           : (20 reviews)
    The Coffee Bean & Tea Leaf                        : 5.0 rated at 2018/08/25
    McDonald's                                        : 3.0 rated at 2018/08/30
    The Stonehaus                                     : 4.0 rated at 2018/09/04
    McDonald's                                        : 3.0 rated at 2018/09/05
    McDonald's                                        : 5.0 rated at 2018/10/07
    McDonald's                                        : 2.0 rated at 2018/10/07
    The Coffee Bean & Tea Leaf                        : 5.0 rated at 2018/10/07
    Starbucks                                         : 5.0 rated at 2018/10/07
    McDonald's                                        : 3.0 rated at 2018/10/09
    McDonald's                                        : 2.0 rated at 2019/06/16
    McDonald's                                        : 3.0 rated at 2019/09/06
    McDonald's                                        : 3.0 rated at 2019/09/06
    McDonald's                                        : 3.0 rated at 2019/09/06
    McDonald's                                        : 3.0 rated at 2020/06/29
    The Coffee Bean & Tea Leaf                        : 5.0 rated at 2020/06/29
    Cafe Demitasse                                    : 4.0 rated at 2020/07/14
    McDonald's                                        : 3.0 rated at 2020/07/18
    Starbucks                                         : 4.0 rated at 2020/08/07
    Crave Cafe                                        : 4.0 rated at 2020/08/18
    Starbucks                                         : 3.0 rated at 2021/02/23

Rank based recommendation for Marci eg9
    Dandelion Chocolate                               : 4.453902244567871
    Sidecar Doughnuts & Coffee                        : 4.445859909057617
    Caffe Caldo                                       : 4.431463718414307
    Réveille Coffee Co.                               : 4.420126914978027
    Porto's Bakery and Cafe                           : 4.413804531097412
    Chez Panisse                                      : 4.399682998657227
    Wide Eyes Open Palms                              : 4.398289203643799
    Coffeebar Truckee                                 : 4.393702983856201
    Amara Cafe & Restaurant                           : 4.391049861907959
    Kuppa Joy Coffee House                            : 4.381416320800781

Cosine based recommendation for Marci eg9
    Chile Lindo Empanadas                             : 0.8863617777824402
    Starbucks                                         : 0.8668469786643982
    Paris Baguette                                    : 0.8583994507789612
    Jack Ranch Cafe                                   : 0.855861485004425
    Yum Yum Donuts                                    : 0.8499815464019775
    Barnes & Noble                                    : 0.8396624326705933
    Groundwork Coffee Co.                             : 0.8373353481292725
    Dos Palmas Cafe                                   : 0.8291140794754028
    Starbucks                                         : 0.828869104385376
    Patys Restaurant                                  : 0.827272891998291

Norm based recommendation for Marci eg9
    Yum Yum Donuts                                    : 0.24048852920532227
    Dutch Bros Coffee                                 : 0.24425646662712097
    Barnes & Noble                                    : 0.2474110722541809
    Patys Restaurant                                  : 0.2476789951324463
    McDonald's                                        : 0.25076815485954285
    Groundwork Coffee Co.                             : 0.2531602382659912
    Winchell's Donut House                            : 0.2552341818809509
    Tierra Mia Coffee                                 : 0.2552584707736969
    Quickly Cafe                                      : 0.2558749318122864
    Tierra Mia Coffee                                 : 0.25794342160224915

================================================================================

Boris Goncharov     : (21 reviews)
    Bicycle Coffee LA Cafe + Roastery                 : 4.0 rated at 2018/10/21
    The Coffee Bean & Tea Leaf                        : 4.0 rated at 2018/10/21
    Blue Bottle Coffee                                : 5.0 rated at 2018/12/23
    Alfred Coffee Melrose Place                       : 5.0 rated at 2019/01/27
    Starbucks                                         : 5.0 rated at 2019/02/06
    Starbucks                                         : 5.0 rated at 2019/04/02
    Jones Coffee Roasters                             : 5.0 rated at 2019/05/04
    Tilt Coffee Bar                                   : 4.0 rated at 2019/05/13
    Black Elephant Coffee                             : 4.0 rated at 2019/05/21
    Coffee Commissary                                 : 5.0 rated at 2019/06/07
    Cafe de Leche                                     : 4.0 rated at 2019/06/10
    Cyber City Esports Center | Little Tokyo          : 5.0 rated at 2019/07/19
    Starbucks                                         : 4.0 rated at 2019/09/26
    Espresso Profeta                                  : 5.0 rated at 2019/09/28
    Superba Snacks + Coffee                           : 5.0 rated at 2019/11/29
    Black Dog Coffee                                  : 3.0 rated at 2020/02/03
    Cafe Demitasse                                    : 5.0 rated at 2020/02/29
    Starbucks                                         : 5.0 rated at 2021/02/08
    The Highland Cafe                                 : 3.0 rated at 2021/02/08
    Verve Coffee Roasters                             : 5.0 rated at 2021/02/08
    Crystal Lake Cafe                                 : 5.0 rated at 2021/02/08

Rank based recommendation for Boris Goncharov
    Verve Coffee Roasters                             : 4.7691121101379395
    Barnes & Noble                                    : 4.7616071701049805
    Philz Coffee                                      : 4.747860431671143
    Philz Coffee                                      : 4.745265483856201
    Barnes & Noble                                    : 4.739681243896484
    Barnes & Noble                                    : 4.72083854675293
    Starbucks                                         : 4.718288898468018
    Philz Coffee                                      : 4.714847564697266
    Verve Coffee Roasters                             : 4.7062907218933105
    Equator Coffees                                   : 4.705522537231445

Cosine based recommendation for Boris Goncharov
    Starbucks                                         : 0.9608664512634277
    Starbucks                                         : 0.9523919820785522
    Starbucks                                         : 0.9415614008903503
    Fig Tree                                          : 0.9375962615013123
    Mickey's Yogurt Hemet, LLC                        : 0.9264169931411743
    Starbucks                                         : 0.9185886383056641
    Starbucks                                         : 0.9170356392860413
    Starbucks                                         : 0.9163199663162231
    Starbucks                                         : 0.9121597409248352
    Starbucks                                         : 0.9099191427230835

Norm based recommendation for Boris Goncharov
    Starbucks                                         : 0.24615934491157532
    Starbucks                                         : 0.2540782690048218
    Starbucks                                         : 0.26097145676612854
    Starbucks                                         : 0.26730629801750183
    Starbucks                                         : 0.27516981959342957
    Starbucks                                         : 0.27738359570503235
    Starbucks                                         : 0.280021995306015
    Big Sur Bakery                                    : 0.28344976902008057
    Starbucks                                         : 0.2835765779018402
    McDonald's                                        : 0.2854447066783905

================================================================================

Sean O’Connor       : (20 reviews)
    The Creamery                                      : 2.0 rated at 2012/08/06
    The Creamery                                      : 2.0 rated at 2012/08/06
    Cafe Prague                                       : 3.0 rated at 2012/10/03
    The Plant Café Organic                            : 3.0 rated at 2012/12/22
    Velo Rouge Cafe                                   : 4.0 rated at 2013/03/04
    Tierra Mia Coffee                                 : 4.0 rated at 2013/10/16
    Gaslamp Cafe                                      : 4.0 rated at 2014/07/08
    The Sacred Grounds Cafe                           : 1.0 rated at 2015/07/15
    Antigua Coffee Shop                               : 4.0 rated at 2016/03/23
    Flywheel Coffee Roasters                          : 4.0 rated at 2016/04/10
    Flywheel Coffee Roasters                          : 4.0 rated at 2016/04/10
    Matching Half                                     : 4.0 rated at 2016/07/17
    Java Beach At The Zoo                             : 4.0 rated at 2016/11/24
    The Mill                                          : 5.0 rated at 2016/11/24
    South Beach Cafe                                  : 3.0 rated at 2016/11/24
    Java Beach Cafe                                   : 4.0 rated at 2016/11/24
    Stable Cafe                                       : 5.0 rated at 2018/04/02
    Hollow                                            : 4.0 rated at 2019/11/15
    Snowbird Coffee                                   : 5.0 rated at 2020/03/07
    Manitas Cafe                                      : 4.0 rated at 2020/05/27

Rank based recommendation for Sean O’Connor
    Philz Coffee                                      : 4.174536228179932
    Barnes & Noble                                    : 4.168290138244629
    Blue Bottle Coffee                                : 4.144691467285156
    Verve Coffee Roasters                             : 4.137280464172363
    Barnes & Noble                                    : 4.131244659423828
    Philz Coffee                                      : 4.1218581199646
    Réveille Coffee Co.                               : 4.118056297302246
    Verve Coffee Roasters                             : 4.10911750793457
    Philz Coffee                                      : 4.088583469390869
    Ettore's Bakery & Cafe                            : 4.085151672363281

Cosine based recommendation for Sean O’Connor
    Starbucks                                         : 0.916006326675415
    Starbucks                                         : 0.9148972630500793
    Gio's Bakery & Cafe                               : 0.9122514724731445
    Starbucks                                         : 0.9044554829597473
    Dutch Bros Coffee                                 : 0.8933252692222595
    Starbucks                                         : 0.8848211169242859
    Starbucks                                         : 0.8762492537498474
    Starbucks                                         : 0.876115620136261
    Starbucks                                         : 0.8740797638893127
    Starbucks                                         : 0.8724584579467773

Norm based recommendation for Sean O’Connor
    Starbucks                                         : 0.3406066596508026
    McDonald's                                        : 0.35045233368873596
    Starbucks                                         : 0.3624860942363739
    Starbucks                                         : 0.36517176032066345
    Starbucks                                         : 0.3727642893791199
    Peet's Coffee                                     : 0.3790369927883148
    Yum Yum Donuts                                    : 0.3799285888671875
    Starbucks                                         : 0.3816395401954651
    Starbucks                                         : 0.3824274241924286
    McDonald's                                        : 0.3835294246673584

================================================================================

Daniel Morgan       : (25 reviews)
    McDonald's                                        : 5.0 rated at 2017/07/13
    Starbucks Reserve                                 : 5.0 rated at 2018/02/21
    McDonald's                                        : 5.0 rated at 2018/02/27
    Starbucks                                         : 5.0 rated at 2018/02/27
    Heidi's Pies Restaurant                           : 5.0 rated at 2018/03/10
    Philz Coffee                                      : 5.0 rated at 2018/03/11
    McDonald's                                        : 5.0 rated at 2018/04/25
    Starbucks                                         : 5.0 rated at 2018/06/27
    Starbucks                                         : 5.0 rated at 2018/06/27
    McDonald's                                        : 5.0 rated at 2018/06/27
    Starbucks                                         : 5.0 rated at 2018/06/27
    Starbucks                                         : 5.0 rated at 2018/06/27
    McDonald's                                        : 5.0 rated at 2018/06/27
    Sizzler                                           : 5.0 rated at 2018/06/27
    McDonald's                                        : 5.0 rated at 2018/06/27
    Barnes & Noble                                    : 5.0 rated at 2019/01/21
    La Stazione Coffee & Wine Bar                     : 5.0 rated at 2019/02/08
    Starbucks                                         : 5.0 rated at 2019/02/08
    Starbucks                                         : 5.0 rated at 2019/02/08
    Philz Coffee                                      : 3.0 rated at 2019/12/16
    McDonald's                                        : 5.0 rated at 2019/12/16
    Barnes & Noble                                    : 5.0 rated at 2019/12/16
    Dunkin'                                           : 5.0 rated at 2019/12/16
    Starbucks                                         : 5.0 rated at 2019/12/16
    McDonald's                                        : 5.0 rated at 2020/02/06

Rank based recommendation for Daniel Morgan
    Bird Rock Coffee Roasters                         : 5.437586784362793
    Dutch Bros Coffee                                 : 5.39506196975708
    The Coffee Bean & Tea Leaf                        : 5.3903489112854
    Starbucks                                         : 5.381923198699951
    Krispy Kreme                                      : 5.3708391189575195
    Peet's Coffee                                     : 5.366911888122559
    Peet's Coffee                                     : 5.340319633483887
    The Coffee Bean & Tea Leaf                        : 5.331013202667236
    Peet's Coffee                                     : 5.33083438873291
    Yum Yum Donuts                                    : 5.3259100914001465

Cosine based recommendation for Daniel Morgan
    McDonald's                                        : 0.9756391048431396
    McDonald's                                        : 0.9599661231040955
    Starbucks                                         : 0.9530150890350342
    McDonald's                                        : 0.9523974657058716
    McDonald's                                        : 0.9522244930267334
    The Riverside Airport Cafe                        : 0.9495770931243896
    Flakey Cream Do-Nuts & Coffee Shop                : 0.9495470523834229
    Dutch Bros Coffee                                 : 0.9461210370063782
    McDonald's                                        : 0.9458843469619751
    Starbucks                                         : 0.9446702003479004

Norm based recommendation for Daniel Morgan
    Starbucks                                         : 0.3179968297481537
    McDonald's                                        : 0.3241705894470215
    McDonald's                                        : 0.3414623439311981
    McDonald's                                        : 0.3420897424221039
    McDonald's                                        : 0.34683549404144287
    Square Bar Cafe                                   : 0.347204327583313
    McDonald's                                        : 0.35923945903778076
    Panera Bread                                      : 0.36604663729667664
    McDonald's                                        : 0.3708284795284271
    McDonald's                                        : 0.37122559547424316

================================================================================

bruce paulson       : (20 reviews)
    Lou's Coffee Shop                                 : 3.0 rated at 2017/07/06
    Brown Chicken Brown Cow                           : 3.0 rated at 2017/07/06
    McDonald's                                        : 5.0 rated at 2017/08/30
    Starbucks                                         : 4.0 rated at 2017/11/23
    Starbucks                                         : 3.0 rated at 2017/11/29
    McDonald's                                        : 3.0 rated at 2017/12/15
    McDonald's                                        : 3.0 rated at 2018/01/12
    McDonald's                                        : 4.0 rated at 2018/02/13
    Starbucks                                         : 3.0 rated at 2018/02/13
    McDonald's                                        : 3.0 rated at 2018/04/18
    McDonald's                                        : 3.0 rated at 2018/07/06
    Starbucks                                         : 4.0 rated at 2018/07/07
    Paul's Coffee Shop                                : 4.0 rated at 2018/07/29
    McDonald's                                        : 4.0 rated at 2018/07/30
    McDonald's                                        : 4.0 rated at 2018/07/30
    Perko's Café                                      : 3.0 rated at 2018/11/25
    Stanislaus County Fair                            : 5.0 rated at 2019/01/31
    McDonald's                                        : 4.0 rated at 2019/11/04
    McDonald's                                        : 5.0 rated at 2020/12/16
    Starbucks                                         : 4.0 rated at 2020/12/19

Rank based recommendation for bruce paulson
    Porto's Bakery and Cafe                           : 4.439476490020752
    Dandelion Chocolate                               : 4.409303665161133
    Bird Rock Coffee Roasters                         : 4.405673027038574
    The Conservatory for Coffee, Tea & Cocoa          : 4.363326072692871
    Ritual Coffee Roasters                            : 4.360138416290283
    Dark Horse Coffee Roasters Truckee                : 4.356546878814697
    Backyard Brew                                     : 4.352289199829102
    Bottega Italiana                                  : 4.350727081298828
    Arizmendi Bakery                                  : 4.3388166427612305
    The Coffee Bean & Tea Leaf                        : 4.338538646697998

Cosine based recommendation for bruce paulson
    Local Fixture                                     : 0.8987312316894531
    McDonald's                                        : 0.8985915780067444
    McDonald's                                        : 0.8938546776771545
    McDonald's                                        : 0.8866934776306152
    Panera Bread                                      : 0.8814514875411987
    Peanuts Deluxe Cafe                               : 0.8754969835281372
    McDonald's                                        : 0.8734794855117798
    Sizzler                                           : 0.8666186332702637
    McDonald's                                        : 0.864490270614624
    McDonald's                                        : 0.8636798858642578

Norm based recommendation for bruce paulson
    McDonald's                                        : 0.26800641417503357
    McDonald's                                        : 0.31819337606430054
    McDonald's                                        : 0.32026055455207825
    McDonald's                                        : 0.320521742105484
    McDonald's                                        : 0.3237784206867218
    McDonald's                                        : 0.3278183937072754
    McDonald's                                        : 0.3369661271572113
    McDonald's                                        : 0.3411518633365631
    McDonald's                                        : 0.34319788217544556
    McDonald's                                        : 0.34881553053855896

================================================================================

larry mucho         : (46 reviews)
    Raley's                                           : 3.0 rated at 2017/05/04
    Picasso's Gourmet Deli                            : 4.0 rated at 2017/05/04
    Limelight Bar & Café                              : 3.0 rated at 2017/06/16
    Chocolate Fish Coffee Roasters                    : 4.0 rated at 2017/10/05
    Pachamama Coffee Bar                              : 4.0 rated at 2017/10/19
    Chocolate Fish Coffee                             : 4.0 rated at 2017/11/08
    McDonald's                                        : 2.0 rated at 2017/11/16
    McDonald's                                        : 1.0 rated at 2017/11/27
    Town & Country Cafe                               : 4.0 rated at 2017/12/09
    Noah's NY Bagels                                  : 4.0 rated at 2018/02/26
    Temple Coffee Roasters                            : 3.0 rated at 2018/05/24
    Panera Bread                                      : 2.0 rated at 2018/07/13
    Starbucks                                         : 4.0 rated at 2018/07/31
    Revive Coffee & Wine                              : 4.0 rated at 2018/08/09
    Old Soul @ 40 Acres                               : 4.0 rated at 2018/08/09
    Shine Kava                                        : 4.0 rated at 2018/09/12
    McDonald's                                        : 3.0 rated at 2018/10/24
    Starbucks                                         : 3.0 rated at 2018/12/21
    McDonald's                                        : 2.0 rated at 2018/12/31
    McDonald's                                        : 3.0 rated at 2019/01/03
    Village Baking Company & Cafe                     : 4.0 rated at 2019/01/14
    Pegasus Bakery & Café                             : 5.0 rated at 2019/02/05
    Starbucks                                         : 4.0 rated at 2019/02/19
    Ettore's Bakery & Cafe                            : 4.0 rated at 2019/03/14
    Badfish Coffee & Tea                              : 4.0 rated at 2019/03/25
    Jack's Urban Eats                                 : 4.0 rated at 2019/04/04
    World Coffee House                                : 3.0 rated at 2019/04/30
    Starbucks                                         : 4.0 rated at 2019/06/11
    Fair Oaks Coffee House & Deli                     : 4.0 rated at 2019/07/25
    Sun City Lincoln Hills                            : 4.0 rated at 2019/07/25
    Emily's Good Things To Eat                        : 4.0 rated at 2019/08/29
    Karen's Bakery                                    : 4.0 rated at 2019/09/25
    McDonald's                                        : 3.0 rated at 2019/10/14
    Noah's NY Bagels                                  : 4.0 rated at 2019/11/26
    McDonald's                                        : 4.0 rated at 2019/12/20
    Shift Coffee House                                : 4.0 rated at 2020/01/09
    Old Soul Co.                                      : 4.0 rated at 2020/01/13
    McDonald's                                        : 3.0 rated at 2020/01/24
    Dos Coyotes Border Cafe                           : 4.0 rated at 2020/01/25
    Noah's NY Bagels                                  : 4.0 rated at 2020/08/07
    Raley's                                           : 4.0 rated at 2020/09/02
    Spinners                                          : 5.0 rated at 2020/09/15
    Dunkin'                                           : 4.0 rated at 2020/10/26
    Starbucks                                         : 4.0 rated at 2021/01/21
    Greek Food Imports - An Original Greek Market-Cafe: 5.0 rated at 2021/03/17
    La Bou Bakery & Café                              : 4.0 rated at 2021/05/04

Rank based recommendation for larry mucho
    Dandelion Chocolate                               : 4.566472053527832
    Porto's Bakery and Cafe                           : 4.555684566497803
    Caffe Caldo                                       : 4.555110454559326
    Chez Panisse                                      : 4.528485298156738
    République                                        : 4.517945766448975
    Sidecar Doughnuts & Coffee                        : 4.492447853088379
    Wide Eyes Open Palms                              : 4.473511219024658
    Carmela Ice Cream                                 : 4.461442947387695
    Dune Coffee Roasters                              : 4.453752517700195
    Dandelion Chocolate 16th Street Factory           : 4.4409918785095215

Cosine based recommendation for larry mucho
    Pop's Cafe                                        : 0.9469003081321716
    Pachamama Coffee                                  : 0.9464800953865051
    Nékter Juice Bar                                  : 0.9406734704971313
    Laguna Coffee Company                             : 0.9392879605293274
    Starbucks                                         : 0.927619993686676
    Dutch Bros Coffee                                 : 0.9256097078323364
    Village Inn Café                                  : 0.925497829914093
    Green Bakery & Cafe                               : 0.921710193157196
    The Coffee Bean & Tea Leaf                        : 0.9136456847190857
    Flower Farm Cafe                                  : 0.9130439162254333

Norm based recommendation for larry mucho
    Swami's Cafe Oceanside                            : 0.3541295826435089
    Dunkin'                                           : 0.37829098105430603
    Blue Bottle Coffee                                : 0.38683801889419556
    McDonald's                                        : 0.3895183801651001
    Starbucks                                         : 0.3962879776954651
    The Coffee Bean & Tea Leaf                        : 0.39791616797447205
    Dutch Bros Coffee                                 : 0.4032057523727417
    Afters Ice Cream                                  : 0.4080636501312256
    Philz Coffee                                      : 0.408480167388916
    Starbucks                                         : 0.41748952865600586

================================================================================

Tony Cardoza        : (29 reviews)
    Olive Pit                                         : 5.0 rated at 2018/02/23
    Olive Pit                                         : 5.0 rated at 2018/02/23
    McDonald's                                        : 5.0 rated at 2018/03/10
    Starbucks                                         : 5.0 rated at 2018/03/23
    McDonald's                                        : 5.0 rated at 2018/04/05
    Panera Bread                                      : 3.0 rated at 2018/04/09
    Panera Bread                                      : 3.0 rated at 2018/04/10
    Starbucks                                         : 5.0 rated at 2018/04/29
    Panera Bread                                      : 5.0 rated at 2018/04/29
    McDonald's                                        : 5.0 rated at 2018/04/30
    McDonald's                                        : 5.0 rated at 2018/04/30
    Sizzler - Santa Clara                             : 5.0 rated at 2018/05/19
    Half Moon Bay Coffee Co                           : 5.0 rated at 2018/05/20
    Cunha's Country Store                             : 5.0 rated at 2018/05/20
    McDonald's                                        : 5.0 rated at 2018/05/27
    Starbucks                                         : 5.0 rated at 2018/05/29
    Starbucks                                         : 5.0 rated at 2018/05/29
    Starbucks                                         : 5.0 rated at 2018/05/30
    Starbucks                                         : 5.0 rated at 2018/06/08
    Starbucks                                         : 5.0 rated at 2018/07/23
    Starbucks                                         : 5.0 rated at 2018/07/28
    Starbucks                                         : 5.0 rated at 2018/08/02
    Sizzler                                           : 5.0 rated at 2018/08/29
    Barnes & Noble                                    : 5.0 rated at 2018/09/10
    Starbucks                                         : 5.0 rated at 2018/10/06
    Panera Bread                                      : 5.0 rated at 2018/10/31
    Dutch Bros Coffee                                 : 5.0 rated at 2018/11/29
    McDonald's                                        : 1.0 rated at 2019/03/17
    Starbucks                                         : 5.0 rated at 2019/06/21

Rank based recommendation for Tony Cardoza
    Réveille Coffee Co.                               : 5.016845703125
    Philz Coffee                                      : 5.012296676635742
    Temple Coffee Roasters                            : 5.011783123016357
    Pop Pie Co.                                       : 4.996665000915527
    Sidecar Doughnuts & Coffee                        : 4.983214378356934
    Philz Coffee                                      : 4.983132362365723
    Porto's Bakery and Cafe                           : 4.979844570159912
    Amara Cafe & Restaurant                           : 4.977750778198242
    Coffeebar Truckee                                 : 4.966866493225098
    Hook Fish Co                                      : 4.964085102081299

Cosine based recommendation for Tony Cardoza
    Better Buzz Coffee Pacific Beach West             : 0.9113208651542664
    Bruegger's Bagels                                 : 0.9067589044570923
    Young Hickory North Park                          : 0.8987672924995422
    Starbucks                                         : 0.8874898552894592
    Starbucks                                         : 0.8845992684364319
    Country Cafe                                      : 0.8793437480926514
    85°C Bakery Cafe - Newark                         : 0.8756945729255676
    Krispy Kreme                                      : 0.8753010630607605
    Starbucks                                         : 0.8700447082519531
    McDonald's                                        : 0.8667906522750854

Norm based recommendation for Tony Cardoza
    Better Buzz Coffee Pacific Beach West             : 0.28411489725112915
    Starbucks                                         : 0.30116352438926697
    Brick & Bell Cafe - La Jolla Shores               : 0.3205745816230774
    McDonald's                                        : 0.32858720421791077
    McDonald's                                        : 0.3287613093852997
    Starbucks                                         : 0.3323356509208679
    Thai Cafe                                         : 0.34010374546051025
    The Little Swiss Café                             : 0.34250500798225403
    Isabelle Briens French Pastry Cafe                : 0.3437526524066925
    Amandine Patisserie Cafe                          : 0.34671279788017273

================================================================================

Wenhsiu Hassan      : (24 reviews)
    McDonald's                                        : 3.0 rated at 2017/12/09
    Barnes & Noble                                    : 5.0 rated at 2018/02/10
    Zorro's Cafe & Cantina                            : 5.0 rated at 2018/02/19
    Corner Bakery                                     : 4.0 rated at 2018/07/23
    Starbucks                                         : 4.0 rated at 2018/07/23
    85C Bakery Cafe - Valencia                        : 5.0 rated at 2018/08/04
    Porto's Bakery and Cafe                           : 5.0 rated at 2018/08/23
    Philz Coffee                                      : 5.0 rated at 2018/08/23
    Egg Plantation                                    : 5.0 rated at 2018/08/26
    85°C Bakery Cafe - Pasadena                       : 5.0 rated at 2018/10/14
    Copper Cafe and Bakery                            : 5.0 rated at 2019/02/23
    Copper Cafe and Bakery                            : 5.0 rated at 2019/02/23
    Starbucks                                         : 5.0 rated at 2019/03/27
    McDonald's                                        : 4.0 rated at 2019/03/27
    Starbucks                                         : 5.0 rated at 2019/03/27
    Starbucks                                         : 5.0 rated at 2019/03/30
    Ritual Coffee Roasters                            : 5.0 rated at 2019/04/01
    Madonna Inn                                       : 5.0 rated at 2019/04/02
    The Mad Greek                                     : 4.0 rated at 2019/04/05
    Starbucks                                         : 4.0 rated at 2019/04/05
    Porto's Bakery and Cafe                           : 5.0 rated at 2019/04/16
    Kona Loa Coffee Mission Viejo                     : 5.0 rated at 2019/10/12
    Bon Bon Tea House                                 : 5.0 rated at 2020/02/15
    Cassell's Hamburgers                              : 5.0 rated at 2020/10/26

Rank based recommendation for Wenhsiu Hassan
    Réveille Coffee Co.                               : 5.054797172546387
    Temple Coffee Roasters                            : 5.039400100708008
    Sidecar Doughnuts & Coffee                        : 5.030787944793701
    Amara Cafe & Restaurant                           : 5.018215656280518
    Philz Coffee                                      : 5.008330345153809
    Stumptown Coffee Roasters                         : 5.005703449249268
    Cat & Cloud Coffee                                : 4.991421222686768
    Coffee Shop                                       : 4.987072944641113
    Hook Fish Co                                      : 4.97988224029541
    Coffeebar Truckee                                 : 4.978761196136475

Cosine based recommendation for Wenhsiu Hassan
    Rad Coffee                                        : 0.8794376850128174
    Public Square Coffee House                        : 0.8680062890052795
    Starbucks                                         : 0.8679584860801697
    The Tree House Cafe                               : 0.830746591091156
    Sunrise Cafe                                      : 0.8244848251342773
    Goldfish Point Cafe                               : 0.8231930732727051
    Component Coffee Lab                              : 0.8191201090812683
    Yum Yum Donuts                                    : 0.809773862361908
    Starbucks                                         : 0.8090744614601135
    Java Station                                      : 0.8086151480674744

Norm based recommendation for Wenhsiu Hassan
    Starbucks                                         : 0.3680170178413391
    Daily News Cafe                                   : 0.37921378016471863
    Koffi Central Palm Springs                        : 0.38404592871665955
    Starbucks                                         : 0.38562387228012085
    Rad Coffee                                        : 0.38882848620414734
    Cafe Gratitude San Diego                          : 0.3889198303222656
    Sizzler - Los Alamitos                            : 0.3911900818347931
    Starbucks                                         : 0.40470433235168457
    Starbucks                                         : 0.4090668261051178
    Starbucks                                         : 0.4129127562046051

================================================================================

Latent Analysis¶

First we need to map each latent to features of cafes such as if it is chain, its price level, and its average rating.

In [129]:
def get_cafe2index(cafes):
    unique_gmap_ids = np.sort(np.unique(cafes["gmap_id"]))
    cafe2index = {gmap_id: index for index, gmap_id in enumerate(unique_gmap_ids)}
    return cafe2index

def get_cafe_latents(name):
    model_path = f"./models/{name}.pt"
    model = torch.load(model_path, weights_only=False)
    cafe_latents = model.latents["cafe"]

    return cafe_latents

def find_famous_chains(name):
    if "starbucks" in name.lower():
        return "Starbucks"

    elif "mcdonald" in name.lower():
        return "McDonald's"

    else:
        return "Others"

def price_to_num(p):
    if pd.isna(p):
        return np.nan

    p = str(p).strip()

    if p == "" or p.lower() == "none":
        return np.nan

    return p.count("$") or np.nan

def get_category_mappings(cafes, category):
    if category == "chain":
        cafes = pd.read_csv("./datasets/processed/cafes.csv")
        chains = get_chains_dict(cafes)

        chain_map = {0: "No Chain", 1: "Chain", 2: "Chain"}
        cafe2category = {gmap_id: chain_map[chains[name]] for gmap_id, name in cafes[["gmap_id", "name"]].values}

    elif category == "price":
        cafes["price_num"] = cafes["price"].apply(price_to_num)

        cafe2category = {gmap_id: str(price) for gmap_id, price in cafes[["gmap_id", "price_num"]].values}

    elif category == "famous_chain":
        cafes = pd.read_csv("./datasets/processed/cafes.csv")
        chains = get_chains_dict(cafes)

        cafe2category = {gmap_id: find_famous_chains(name) for gmap_id, name in cafes[["gmap_id", "name"]].values}

    elif category == "avg_rating":
        cafe2category = {gmap_id: avg_rating for gmap_id, avg_rating in cafes[["gmap_id", "avg_rating"]].values}

    elif category == "county":
        cafe2category = {gmap_id: get_county(latitude, longitude) for (gmap_id, latitude, longitude) in cafes[["gmap_id", "latitude", "longitude"]].values}
    else:
        raise NotImplementedError

    return cafe2category
In [130]:
def plot_elbow(components):
    stds = np.std(components, axis=0)

    df = pd.DataFrame({"std": stds, "component": np.arange(stds.shape[0])})

    plt.figure(figsize=(10, 5))

    sns.lineplot(df, x="component", y="std")
    sns.scatterplot(df, x="component", y="std")

    plt.title("Elbow plot for PCA")
    plt.xlabel("Components sorted by their standard deviations")
    plt.ylabel("Standard Deviations")
    plt.tight_layout()
    plt.show()

def plot_top_two_componets(df, category):
    plt.figure(figsize=(20, 10))

    if category == "avg_rating":
        ax = sns.scatterplot(df, x="component_0", y="component_1", hue="category", palette="inferno", alpha=0.4)
        norm = plt.Normalize(2, 5)
        plt.colorbar(
            plt.cm.ScalarMappable(cmap="inferno", norm=norm),
            ax=plt.gca()
        )
        ax.get_legend().remove()

    elif category == "chain" or category == "famous_chain":
        sns.scatterplot(df, x="component_0", y="component_1", style="category", hue="category", alpha=0.4)

    else:
        sns.scatterplot(df, x="component_0", y="component_1", hue="category", alpha=0.4)

    plt.xlabel("PC1")
    plt.ylabel("PC2")
    plt.title(f"Cafe latent space (PCA) colored by {category}")
    plt.tight_layout()
    plt.show()

def plot_pairs(df, category, n_components):
    variables = [f"component_{i}" for i in range(n_components)][:5]

    plt.figure(figsize=(50, 50))

    if category == "avg_rating":
        sns.pairplot(df, vars=variables, hue="category", palette="inferno", plot_kws={
            "alpha": 0.4, "s": 10, "palette": "inferno"})

    else:
        sns.pairplot(df, vars=variables, hue="category", plot_kws={"alpha": 0.4, "s": 10})

    plt.suptitle(f"Pair plots of components colored by {category}")
    plt.show()
In [131]:
def analyze_pca(param_dict, category):
    name, _ = get_names(param_dict)
    print(f"Analyzing PCA for {category}")
    cafes = pd.read_csv("./datasets/processed/cafes.csv")
    cafe2index = get_cafe2index(cafes)

    latents = get_cafe_latents(name)
    cafe2latent = {gmap_id: latents[index].detach().numpy() for gmap_id, index in cafe2index.items()}

    cafe2category = get_category_mappings(cafes, category)

    latents, categories = [], []

    for gmap_id in cafes["gmap_id"].values:
        latents.append(cafe2latent[gmap_id])
        categories.append(cafe2category[gmap_id])

    n_components = 10

    pca = PCA(n_components=n_components, random_state=0)
    components = pca.fit_transform(latents)

    data_dict = {f"component_{i}": components[:, i] for i in range(n_components)}
    data_dict["category"] = categories
    df = pd.DataFrame(data_dict)

    sns.set_theme(style="whitegrid", palette="tab10")

    if category == "avg_rating":
        plot_elbow(components)
    plot_top_two_componets(df, category)
    plot_pairs(df, category, n_components)
In [132]:
analyze_pca(baseline_params[1], "famous_chain")
Analyzing PCA for famous_chain
No description has been provided for this image
<Figure size 5000x5000 with 0 Axes>
No description has been provided for this image
In [133]:
analyze_pca(final_param_dict, "avg_rating")
Analyzing PCA for avg_rating
No description has been provided for this image
No description has been provided for this image
<Figure size 5000x5000 with 0 Axes>
No description has been provided for this image
In [134]:
analyze_pca(final_param_dict, "chain")
Analyzing PCA for chain
No description has been provided for this image
<Figure size 5000x5000 with 0 Axes>
No description has been provided for this image
In [135]:
analyze_pca(final_param_dict, "price")
Analyzing PCA for price
No description has been provided for this image
<Figure size 5000x5000 with 0 Axes>
No description has been provided for this image
In [136]:
analyze_pca(final_param_dict, "famous_chain")
Analyzing PCA for famous_chain
No description has been provided for this image
<Figure size 5000x5000 with 0 Axes>
No description has been provided for this image
In [137]:
analyze_pca(final_param_dict, "county")
Analyzing PCA for county
No description has been provided for this image
<Figure size 5000x5000 with 0 Axes>
No description has been provided for this image

Training Classifier on Latents¶

In [138]:
def train_classifier(latents, category, categories):
    model = LogisticRegression()

    latents = np.array(latents)

    if category == "avg_rating":
        model = LinearRegression()
        model = model.fit(latents, categories)

        preds = model.predict(latents)
        r2 = r2_score(categories, preds)

        print(f"R^2 score for Lienar Regression to predict {category} is {r2}")

    elif category in ["chain", "price", "famous_chain"]:
        unique, counts = np.unique(categories, return_counts=True)
        most_common = unique[np.argmax(counts)]

        accuracy = np.sum(np.array(categories) == most_common) / len(categories)

        print(f"    Most Common       : {accuracy:.6f}")

        model = LogisticRegression()
        model = model.fit(latents, categories)

        preds = model.predict(latents)
        accuracy = np.sum(preds == categories) / preds.shape[0]

        print(f"    Logistic Regresion: {accuracy:.6f}")

        model = SVC()
        model = model.fit(latents, categories)

        preds = model.predict(latents)
        accuracy = np.sum(preds == categories) / preds.shape[0]

        print(f"    SVM               : {accuracy:.6f}")
In [139]:
def train_classifiers(param_dict, category):
    cafes = pd.read_csv("./datasets/processed/cafes.csv")
    cafe2index = get_cafe2index(cafes)

    name, _ = get_names(param_dict)
    latents = get_cafe_latents(name)
    cafe2latent = {gmap_id: latents[index].detach().numpy() for gmap_id, index in cafe2index.items()}

    cafe2category = get_category_mappings(cafes, category)

    latents, categories = [], []

    for gmap_id in cafes["gmap_id"].values:
        latents.append(cafe2latent[gmap_id])
        categories.append(cafe2category[gmap_id])

    train_classifier(latents, category, categories)
In [140]:
train_classifiers(final_param_dict, "chain")
    Most Common       : 0.525366
    Logistic Regresion: 0.568008
    SVM               : 0.731905
In [141]:
train_classifiers(final_param_dict, "famous_chain")
    Most Common       : 0.635141
    Logistic Regresion: 0.691778
    SVM               : 0.743057
In [142]:
train_classifiers(final_param_dict, "avg_rating")
R^2 score for Lienar Regression to predict avg_rating is 0.15401983261108398